From 95a7713713eeca6bf61ba2fc97725c4d3f5d3035 Mon Sep 17 00:00:00 2001 From: notch Date: Thu, 14 Jan 2021 08:05:54 +0800 Subject: [PATCH] parse h265 metadata --- av/codec/hevc/const.go | 58 +++ av/codec/hevc/shortcut.go | 30 ++ av/codec/hevc/sps.go | 714 +++++++++++++++++++++++++++++++++++++ av/codec/hevc/sps_test.go | 66 ++++ av/codec/hevc/vps.go | 493 +++++++++++++++++++++++++ av/codec/hevc/vps_test.go | 48 +++ av/format/sdp/parsemeta.go | 41 ++- 7 files changed, 1449 insertions(+), 1 deletion(-) create mode 100644 av/codec/hevc/shortcut.go create mode 100644 av/codec/hevc/sps.go create mode 100644 av/codec/hevc/sps_test.go create mode 100644 av/codec/hevc/vps.go create mode 100644 av/codec/hevc/vps_test.go diff --git a/av/codec/hevc/const.go b/av/codec/hevc/const.go index 3f2fa27..0ef608f 100755 --- a/av/codec/hevc/const.go +++ b/av/codec/hevc/const.go @@ -121,3 +121,61 @@ static int hevc_probe(char* pbuf, int buf_size) return 0; } **/ + +const ( + // 7.4.3.1: vps_max_layers_minus1 is in [0, 62]. + HEVC_MAX_LAYERS = 63 + // 7.4.3.1: vps_max_sub_layers_minus1 is in [0, 6]. + HEVC_MAX_SUB_LAYERS = 7 + // 7.4.3.1: vps_num_layer_sets_minus1 is in [0, 1023]. + HEVC_MAX_LAYER_SETS = 1024 + + // 7.4.2.1: vps_video_parameter_set_id is u(4). + HEVC_MAX_VPS_COUNT = 16 + // 7.4.3.2.1: sps_seq_parameter_set_id is in [0, 15]. + HEVC_MAX_SPS_COUNT = 16 + // 7.4.3.3.1: pps_pic_parameter_set_id is in [0, 63]. + HEVC_MAX_PPS_COUNT = 64 + + // A.4.2: MaxDpbSize is bounded above by 16. + HEVC_MAX_DPB_SIZE = 16 + // 7.4.3.1: vps_max_dec_pic_buffering_minus1[i] is in [0, MaxDpbSize - 1]. + HEVC_MAX_REFS = HEVC_MAX_DPB_SIZE + + // 7.4.3.2.1: num_short_term_ref_pic_sets is in [0, 64]. + HEVC_MAX_SHORT_TERM_REF_PIC_SETS = 64 + // 7.4.3.2.1: num_long_term_ref_pics_sps is in [0, 32]. + HEVC_MAX_LONG_TERM_REF_PICS = 32 + + // A.3: all profiles require that CtbLog2SizeY is in [4, 6]. + HEVC_MIN_LOG2_CTB_SIZE = 4 + HEVC_MAX_LOG2_CTB_SIZE = 6 + + // E.3.2: cpb_cnt_minus1[i] is in [0, 31]. + HEVC_MAX_CPB_CNT = 32 + + // A.4.1: in table A.6 the highest level allows a MaxLumaPs of 35 651 584. + HEVC_MAX_LUMA_PS = 35651584 + // A.4.1: pic_width_in_luma_samples and pic_height_in_luma_samples are + // constrained to be not greater than sqrt(MaxLumaPs * 8). Hence height/ + // width are bounded above by sqrt(8 * 35651584) = 16888.2 samples. + HEVC_MAX_WIDTH = 16888 + HEVC_MAX_HEIGHT = 16888 + + // A.4.1: table A.6 allows at most 22 tile rows for any level. + HEVC_MAX_TILE_ROWS = 22 + // A.4.1: table A.6 allows at most 20 tile columns for any level. + HEVC_MAX_TILE_COLUMNS = 20 + + // A.4.2: table A.6 allows at most 600 slice segments for any level. + HEVC_MAX_SLICE_SEGMENTS = 600 + + // 7.4.7.1: in the worst case (tiles_enabled_flag and + // entropy_coding_sync_enabled_flag are both set), entry points can be + // placed at the beginning of every Ctb row in every tile, giving an + // upper bound of (num_tile_columns_minus1 + 1) * PicHeightInCtbsY - 1. + // Only a stream with very high resolution and perverse parameters could + // get near that, though, so set a lower limit here with the maximum + // possible value for 4K video (at most 135 16x16 Ctb rows). + HEVC_MAX_ENTRY_POINT_OFFSETS = HEVC_MAX_TILE_COLUMNS * 135 +) diff --git a/av/codec/hevc/shortcut.go b/av/codec/hevc/shortcut.go new file mode 100644 index 0000000..2f6a46b --- /dev/null +++ b/av/codec/hevc/shortcut.go @@ -0,0 +1,30 @@ +// Copyright (c) 2019,CAOHONGJU All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package hevc + +import "github.com/cnotch/ipchub/av/codec" + +// MetadataIsReady . +func MetadataIsReady(vm *codec.VideoMeta) bool { + vps := vm.Vps + sps := vm.Sps //ParameterSet(ParameterSetSps) + pps := vm.Pps //ParameterSet(ParameterSetPps) + if len(vps) == 0 || len(sps) == 0 || len(pps) == 0 { + return false + } + + if vm.Width == 0 { + // decode + var rawsps H265RawSPS + if err := rawsps.Decode(sps); err != nil { + return false + } + vm.Width = rawsps.Width() + vm.Height = rawsps.Height() + vm.FixedFrameRate = rawsps.IsFixedFrameRate() + vm.FrameRate = rawsps.FrameRate() + } + return true +} diff --git a/av/codec/hevc/sps.go b/av/codec/hevc/sps.go new file mode 100644 index 0000000..45b210d --- /dev/null +++ b/av/codec/hevc/sps.go @@ -0,0 +1,714 @@ +// Copyright (c) 2019,CAOHONGJU All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package hevc + +import ( + "encoding/base64" + "errors" + "fmt" + "runtime/debug" + + "github.com/cnotch/ipchub/utils" + "github.com/cnotch/ipchub/utils/bits" +) + +type H265RawScalingList struct { + scaling_list_pred_mode_flag [4][6]uint8 + scaling_list_pred_matrix_id_delta [4][6]uint8 + scaling_list_dc_coef_minus8 [4][6]int16 + scaling_list_delta_coeff [4][6][64]int8 +} + +func (sl *H265RawScalingList) decode(r *bits.Reader) error { + for sizeId := 0; sizeId < 4; sizeId++ { + step := 1 // (sizeId == 3 ? 3 : 1) + if sizeId == 3 { + step = 3 + } + for matrixId := 0; matrixId < 6; matrixId += step { + sl.scaling_list_pred_mode_flag[sizeId][matrixId] = r.ReadBit() + if sl.scaling_list_pred_mode_flag[sizeId][matrixId] == 0 { + sl.scaling_list_pred_matrix_id_delta[sizeId][matrixId] = r.ReadUe8() + } else { + n := 1 << (4 + (sizeId << 1)) + if n > 64 { + n = 64 + } + if sizeId > 1 { + sl.scaling_list_dc_coef_minus8[sizeId-2][matrixId] = r.ReadSe16() + } + for i := 0; i < n; i++ { + sl.scaling_list_delta_coeff[sizeId][matrixId][i] = r.ReadSe8() + } + } + } + } + return nil +} + +type H265RawVUI struct { + aspect_ratio_info_present_flag uint8 + aspect_ratio_idc uint8 + sar_width uint16 + sar_height uint16 + + overscan_info_present_flag uint8 + overscan_appropriate_flag uint8 + + video_signal_type_present_flag uint8 + video_format uint8 + video_full_range_flag uint8 + colour_description_present_flag uint8 + colour_primaries uint8 + transfer_characteristics uint8 + matrix_coefficients uint8 + + chroma_loc_info_present_flag uint8 + chroma_sample_loc_type_top_field uint8 + chroma_sample_loc_type_bottom_field uint8 + + neutral_chroma_indication_flag uint8 + field_seq_flag uint8 + frame_field_info_present_flag uint8 + + default_display_window_flag uint8 + def_disp_win_left_offset uint16 + def_disp_win_right_offset uint16 + def_disp_win_top_offset uint16 + def_disp_win_bottom_offset uint16 + + vui_timing_info_present_flag uint8 + vui_num_units_in_tick uint32 + vui_time_scale uint32 + vui_poc_proportional_to_timing_flag uint8 + vui_num_ticks_poc_diff_one_minus1 uint32 + vui_hrd_parameters_present_flag uint8 + hrd_parameters H265RawHRDParameters + + bitstream_restriction_flag uint8 + tiles_fixed_structure_flag uint8 + motion_vectors_over_pic_boundaries_flag uint8 + restricted_ref_pic_lists_flag uint8 + min_spatial_segmentation_idc uint16 + max_bytes_per_pic_denom uint8 + max_bits_per_min_cu_denom uint8 + log2_max_mv_length_horizontal uint8 + log2_max_mv_length_vertical uint8 +} + +func (vui *H265RawVUI) setDefault(sps *H265RawSPS) { + vui.aspect_ratio_idc = 0 + + vui.video_format = 5 + vui.video_full_range_flag = 0 + vui.colour_primaries = 2 + vui.transfer_characteristics = 2 + vui.matrix_coefficients = 2 + + vui.chroma_sample_loc_type_top_field = 0 + vui.chroma_sample_loc_type_bottom_field = 0 + + vui.tiles_fixed_structure_flag = 0 + vui.motion_vectors_over_pic_boundaries_flag = 1 + vui.min_spatial_segmentation_idc = 0 + vui.max_bytes_per_pic_denom = 2 + vui.max_bits_per_min_cu_denom = 1 + vui.log2_max_mv_length_horizontal = 15 + vui.log2_max_mv_length_vertical = 15 +} + +func (vui *H265RawVUI) decode(r *bits.Reader, sps *H265RawSPS) error { + vui.aspect_ratio_info_present_flag = r.ReadBit() + if vui.aspect_ratio_info_present_flag == 1 { + vui.aspect_ratio_idc = r.ReadUint8(8) + if vui.aspect_ratio_idc == 255 { + vui.sar_width = r.ReadUint16(16) + vui.sar_height = r.ReadUint16(16) + } + } else { + vui.aspect_ratio_idc = 0 + } + + vui.overscan_info_present_flag = r.ReadBit() + if vui.overscan_info_present_flag == 1 { + vui.overscan_appropriate_flag = r.ReadBit() + } + + vui.video_signal_type_present_flag = r.ReadBit() + if vui.video_signal_type_present_flag == 1 { + vui.video_format = r.ReadUint8(3) + vui.video_full_range_flag = r.ReadBit() + vui.colour_description_present_flag = r.ReadBit() + if vui.colour_description_present_flag == 1 { + vui.colour_primaries = r.ReadUint8(8) + vui.transfer_characteristics = r.ReadUint8(8) + vui.matrix_coefficients = r.ReadUint8(8) + } else { + vui.colour_primaries = 2 + vui.transfer_characteristics = 2 + vui.matrix_coefficients = 2 + } + } else { + vui.video_format = 5 + vui.video_full_range_flag = 0 + vui.colour_primaries = 2 + vui.transfer_characteristics = 2 + vui.matrix_coefficients = 2 + } + + vui.chroma_loc_info_present_flag = r.ReadBit() + if vui.chroma_loc_info_present_flag == 1 { + vui.chroma_sample_loc_type_top_field = r.ReadUe8() + vui.chroma_sample_loc_type_bottom_field = r.ReadUe8() + } else { + vui.chroma_sample_loc_type_top_field = 0 + vui.chroma_sample_loc_type_bottom_field = 0 + } + + vui.neutral_chroma_indication_flag = r.ReadBit() + vui.field_seq_flag = r.ReadBit() + vui.frame_field_info_present_flag = r.ReadBit() + + vui.default_display_window_flag = r.ReadBit() + if vui.default_display_window_flag == 1 { + vui.def_disp_win_left_offset = r.ReadUe16() + vui.def_disp_win_right_offset = r.ReadUe16() + vui.def_disp_win_top_offset = r.ReadUe16() + vui.def_disp_win_bottom_offset = r.ReadUe16() + } + + vui.vui_timing_info_present_flag = r.ReadBit() + if vui.vui_timing_info_present_flag == 1 { + vui.vui_num_units_in_tick = r.ReadUint32(32) + vui.vui_time_scale = r.ReadUint32(32) + vui.vui_poc_proportional_to_timing_flag = r.ReadBit() + if vui.vui_poc_proportional_to_timing_flag == 1 { + vui.vui_num_ticks_poc_diff_one_minus1 = r.ReadUe() + } + + vui.vui_hrd_parameters_present_flag = r.ReadBit() + if vui.vui_hrd_parameters_present_flag == 1 { + if err := vui.hrd_parameters.decode(r, true, int(sps.sps_max_sub_layers_minus1)); err != nil { + return err + } + } + } + + vui.bitstream_restriction_flag = r.ReadBit() + if vui.bitstream_restriction_flag == 1 { + vui.tiles_fixed_structure_flag = r.ReadBit() + vui.motion_vectors_over_pic_boundaries_flag = r.ReadBit() + vui.restricted_ref_pic_lists_flag = r.ReadBit() + vui.min_spatial_segmentation_idc = r.ReadUe16() + vui.max_bytes_per_pic_denom = r.ReadUe8() + vui.max_bits_per_min_cu_denom = r.ReadUe8() + vui.log2_max_mv_length_horizontal = r.ReadUe8() + vui.log2_max_mv_length_vertical = r.ReadUe8() + } else { + vui.tiles_fixed_structure_flag = 0 + vui.motion_vectors_over_pic_boundaries_flag = 1 + vui.min_spatial_segmentation_idc = 0 + vui.max_bytes_per_pic_denom = 2 + vui.max_bits_per_min_cu_denom = 1 + vui.log2_max_mv_length_horizontal = 15 + vui.log2_max_mv_length_vertical = 15 + } + + return nil +} + +type H265RawSTRefPicSet struct { + inter_ref_pic_set_prediction_flag uint8 + + delta_idx_minus1 uint8 + delta_rps_sign uint8 + abs_delta_rps_minus1 uint16 + + used_by_curr_pic_flag [HEVC_MAX_REFS]uint8 + use_delta_flag [HEVC_MAX_REFS]uint8 + + num_negative_pics uint8 + num_positive_pics uint8 + delta_poc_s0_minus1 [HEVC_MAX_REFS]uint16 + used_by_curr_pic_s0_flag [HEVC_MAX_REFS]uint8 + delta_poc_s1_minus1 [HEVC_MAX_REFS]uint16 + used_by_curr_pic_s1_flag [HEVC_MAX_REFS]uint8 +} + +func (ps *H265RawSTRefPicSet) decode(r *bits.Reader, st_rps_idx uint8, sps *H265RawSPS) error { + if st_rps_idx != 0 { + ps.inter_ref_pic_set_prediction_flag = r.ReadBit() + } else { + ps.inter_ref_pic_set_prediction_flag = 0 + } + + if ps.inter_ref_pic_set_prediction_flag == 1 { + var ref_rps_idx, num_delta_pocs, num_ref_pics uint8 + var ref *H265RawSTRefPicSet + var delta_rps, d_poc int + var ref_delta_poc_s0, ref_delta_poc_s1, delta_poc_s0, delta_poc_s1 [HEVC_MAX_REFS]int + var used_by_curr_pic_s0, used_by_curr_pic_s1 [HEVC_MAX_REFS]uint8 + + if st_rps_idx == sps.num_short_term_ref_pic_sets { + ps.delta_idx_minus1 = r.ReadUe8() + } else { + ps.delta_idx_minus1 = 0 + } + + ref_rps_idx = st_rps_idx - (ps.delta_idx_minus1 + 1) + ref = &sps.st_ref_pic_set[ref_rps_idx] + num_delta_pocs = ref.num_negative_pics + ref.num_positive_pics + // av_assert0(num_delta_pocs < HEVC_MAX_DPB_SIZE); + + ps.delta_rps_sign = r.ReadBit() + ps.abs_delta_rps_minus1 = r.ReadUe16() + delta_rps = int((1 - 2*ps.delta_rps_sign)) * int(ps.abs_delta_rps_minus1+1) + + num_ref_pics = 0 + for j := 0; j <= int(num_delta_pocs); j++ { + ps.used_by_curr_pic_flag[j] = r.ReadBit() + if ps.used_by_curr_pic_flag[j] == 0 { + ps.use_delta_flag[j] = r.ReadBit() + } else { + ps.use_delta_flag[j] = 1 + } + if ps.use_delta_flag[j] == 1 { + num_ref_pics++ + } + } + if num_ref_pics >= HEVC_MAX_DPB_SIZE { + return errors.New("Invalid stream: short-term ref pic set %d contains too many pictures.\n") + } + + // Since the stored form of an RPS here is actually the delta-step + // form used when inter_ref_pic_set_prediction_flag is not set, we + // need to reconstruct that here in order to be able to refer to + // the RPS later (which is required for parsing, because we don't + // even know what syntax elements appear without it). Therefore, + // this code takes the delta-step form of the reference set, turns + // it into the delta-array form, applies the prediction process of + // 7.4.8, converts the result back to the delta-step form, and + // stores that as the current set for future use. Note that the + // inferences here mean that writers using prediction will need + // to fill in the delta-step values correctly as well - since the + // whole RPS prediction process is somewhat overly sophisticated, + // this hopefully forms a useful check for them to ensure their + // predicted form actually matches what was intended rather than + // an onerous additional requirement. + + d_poc = 0 + for i := 0; i < int(ref.num_negative_pics); i++ { + d_poc -= int(ref.delta_poc_s0_minus1[i] + 1) + ref_delta_poc_s0[i] = d_poc + } + d_poc = 0 + for i := 0; i < int(ref.num_positive_pics); i++ { + d_poc += int(ref.delta_poc_s1_minus1[i] + 1) + ref_delta_poc_s1[i] = d_poc + } + + i := 0 + for j := ref.num_positive_pics - 1; j >= 0; j-- { + d_poc = ref_delta_poc_s1[j] + delta_rps + if d_poc < 0 && ps.use_delta_flag[ref.num_negative_pics+j] == 1 { + delta_poc_s0[i] = d_poc + i++ + used_by_curr_pic_s0[i] = + ps.used_by_curr_pic_flag[ref.num_negative_pics+j] + } + } + if delta_rps < 0 && ps.use_delta_flag[num_delta_pocs] == 1 { + delta_poc_s0[i] = delta_rps + i++ + used_by_curr_pic_s0[i] = + ps.used_by_curr_pic_flag[num_delta_pocs] + } + for j := 0; j < int(ref.num_negative_pics); j++ { + d_poc = ref_delta_poc_s0[j] + delta_rps + if d_poc < 0 && ps.use_delta_flag[j] == 1 { + delta_poc_s0[i] = d_poc + i++ + used_by_curr_pic_s0[i] = ps.used_by_curr_pic_flag[j] + } + } + + ps.num_negative_pics = uint8(i) + for i := 0; i < int(ps.num_negative_pics); i++ { + if i == 0 { + ps.delta_poc_s0_minus1[i] = + uint16(-delta_poc_s0[i] - 1) + } else { + ps.delta_poc_s0_minus1[i] = + uint16(-(delta_poc_s0[i] - delta_poc_s0[i-1]) - 1) + } + ps.used_by_curr_pic_s0_flag[i] = used_by_curr_pic_s0[i] + } + + i = 0 + for j := ref.num_negative_pics - 1; j >= 0; j-- { + d_poc = ref_delta_poc_s0[j] + delta_rps + if d_poc > 0 && ps.use_delta_flag[j] == 1 { + delta_poc_s1[i] = d_poc + i++ + used_by_curr_pic_s1[i] = ps.used_by_curr_pic_flag[j] + } + } + if delta_rps > 0 && ps.use_delta_flag[num_delta_pocs] == 1 { + delta_poc_s1[i] = delta_rps + i++ + used_by_curr_pic_s1[i] = + ps.used_by_curr_pic_flag[num_delta_pocs] + } + for j := 0; j < int(ref.num_positive_pics); j++ { + d_poc = ref_delta_poc_s1[j] + delta_rps + if d_poc > 0 && ps.use_delta_flag[int(ref.num_negative_pics)+j] == 1 { + delta_poc_s1[i] = d_poc + i++ + used_by_curr_pic_s1[i] = + ps.used_by_curr_pic_flag[int(ref.num_negative_pics)+j] + } + } + + ps.num_positive_pics = 1 + for i := 0; i < int(ps.num_positive_pics); i++ { + if i == 0 { + ps.delta_poc_s1_minus1[i] = + uint16(delta_poc_s1[i] - 1) + } else { + ps.delta_poc_s1_minus1[i] = + uint16(delta_poc_s1[i] - delta_poc_s1[i-1] - 1) + } + + ps.used_by_curr_pic_s1_flag[i] = used_by_curr_pic_s1[i] + } + + } else { + ps.num_negative_pics = r.ReadUe8() + ps.num_positive_pics = r.ReadUe8() + + for i := 0; i < int(ps.num_negative_pics); i++ { + ps.delta_poc_s0_minus1[i] = r.ReadUe16() + ps.used_by_curr_pic_s0_flag[i] = r.ReadBit() + } + + for i := 0; i < int(ps.num_positive_pics); i++ { + ps.delta_poc_s1_minus1[i] = r.ReadUe16() + ps.used_by_curr_pic_s1_flag[i] = r.ReadBit() + } + } + + return nil +} + +type H265RawSPS struct { + nal_unit_header H265RawNALUnitHeader + + sps_video_parameter_set_id uint8 + + sps_max_sub_layers_minus1 uint8 + sps_temporal_id_nesting_flag uint8 + + profile_tier_level H265RawProfileTierLevel + + sps_seq_parameter_set_id uint8 + + chroma_format_idc uint8 + separate_colour_plane_flag uint8 + + pic_width_in_luma_samples uint16 + pic_height_in_luma_samples uint16 + + conformance_window_flag uint8 + conf_win_left_offset uint16 + conf_win_right_offset uint16 + conf_win_top_offset uint16 + conf_win_bottom_offset uint16 + + bit_depth_luma_minus8 uint8 + bit_depth_chroma_minus8 uint8 + + log2_max_pic_order_cnt_lsb_minus4 uint8 + + sps_sub_layer_ordering_info_present_flag uint8 + sps_max_dec_pic_buffering_minus1 [HEVC_MAX_SUB_LAYERS]uint8 + sps_max_num_reorder_pics [HEVC_MAX_SUB_LAYERS]uint8 + sps_max_latency_increase_plus1 [HEVC_MAX_SUB_LAYERS]uint32 + + log2_min_luma_coding_block_size_minus3 uint8 + log2_diff_max_min_luma_coding_block_size uint8 + log2_min_luma_transform_block_size_minus2 uint8 + log2_diff_max_min_luma_transform_block_size uint8 + max_transform_hierarchy_depth_inter uint8 + max_transform_hierarchy_depth_intra uint8 + + scaling_list_enabled_flag uint8 + sps_scaling_list_data_present_flag uint8 + scaling_list *H265RawScalingList + + amp_enabled_flag uint8 + sample_adaptive_offset_enabled_flag uint8 + + pcm_enabled_flag uint8 + pcm_sample_bit_depth_luma_minus1 uint8 + pcm_sample_bit_depth_chroma_minus1 uint8 + log2_min_pcm_luma_coding_block_size_minus3 uint8 + log2_diff_max_min_pcm_luma_coding_block_size uint8 + pcm_loop_filter_disabled_flag uint8 + + num_short_term_ref_pic_sets uint8 + st_ref_pic_set []H265RawSTRefPicSet //[HEVC_MAX_SHORT_TERM_REF_PIC_SETS]H265RawSTRefPicSet + + long_term_ref_pics_present_flag uint8 + num_long_term_ref_pics_sps uint8 + lt_ref_pic_poc_lsb_sps [HEVC_MAX_LONG_TERM_REF_PICS]uint16 + used_by_curr_pic_lt_sps_flag [HEVC_MAX_LONG_TERM_REF_PICS]uint8 + + sps_temporal_mvp_enabled_flag uint8 + strong_intra_smoothing_enabled_flag uint8 + + vui_parameters_present_flag uint8 + vui H265RawVUI + + sps_extension_present_flag uint8 + sps_range_extension_flag uint8 + sps_multilayer_extension_flag uint8 + sps_3d_extension_flag uint8 + sps_scc_extension_flag uint8 + sps_extension_4bits uint8 + + // extension_data H265RawExtensionData + + // // Range extension. + // transform_skip_rotation_enabled_flag uint8 + // transform_skip_context_enabled_flag uint8 + // implicit_rdpcm_enabled_flag uint8 + // explicit_rdpcm_enabled_flag uint8 + // extended_precision_processing_flag uint8 + // intra_smoothing_disabled_flag uint8 + // high_precision_offsets_enabled_flag uint8 + // persistent_rice_adaptation_enabled_flag uint8 + // cabac_bypass_alignment_enabled_flag uint8 + + // // Screen content coding extension. + // sps_curr_pic_ref_enabled_flag uint8 + // palette_mode_enabled_flag uint8 + // palette_max_size uint8 + // delta_palette_max_predictor_size uint8 + // sps_palette_predictor_initializer_present_flag uint8 + // sps_num_palette_predictor_initializer_minus1 uint8 + // sps_palette_predictor_initializers [3][128]uint16 + + // motion_vector_resolution_control_idc uint8 + // intra_boundary_filtering_disable_flag uint8 +} + +// Width 视频宽度(像素) +func (sps *H265RawSPS) Width() int { + return int(sps.pic_width_in_luma_samples) +} + +// Height 视频高度(像素) +func (sps *H265RawSPS) Height() int { + return int(sps.pic_height_in_luma_samples) +} + +// FrameRate Video frame rate +func (sps *H265RawSPS) FrameRate() float64 { + if sps.vui.vui_num_units_in_tick == 0 { + return 0.0 + } + return float64(sps.vui.vui_time_scale) / float64(sps.vui.vui_num_units_in_tick) +} + +// IsFixedFrameRate 是否固定帧率 +func (sps *H265RawSPS) IsFixedFrameRate() bool { + // TODO: + return true +} + +// DecodeString 从 base64 字串解码 sps NAL +func (sps *H265RawSPS) DecodeString(b64 string) error { + data, err := base64.StdEncoding.DecodeString(b64) + if err != nil { + return err + } + return sps.Decode(data) +} + +// Decode 从字节序列中解码 sps NAL +func (sps *H265RawSPS) Decode(data []byte) (err error) { + defer func() { + if r := recover(); r != nil { + err = fmt.Errorf("RawSPS decode panic;r = %v \n %s", r, debug.Stack()) + } + }() + + spsWEB := utils.RemoveH264or5EmulationBytes(data) + if len(spsWEB) < 4 { + return errors.New("The data is not enough") + } + + r := bits.NewReader(spsWEB) + if err = sps.nal_unit_header.decode(r); err != nil { + return + } + + if sps.nal_unit_header.nal_unit_type != NalSps { + return errors.New("not is sps NAL UNIT") + } + + sps.sps_video_parameter_set_id = r.ReadUint8(4) + + sps.sps_max_sub_layers_minus1 = r.ReadUint8(3) + sps.sps_temporal_id_nesting_flag = r.ReadBit() + if err = sps.profile_tier_level.decode(r, true, int(sps.sps_max_sub_layers_minus1)); err != nil { + return + } + + sps.sps_seq_parameter_set_id = r.ReadUe8() + + sps.chroma_format_idc = r.ReadUe8() + if sps.chroma_format_idc == 3 { + sps.separate_colour_plane_flag = r.ReadBit() + } + + sps.pic_width_in_luma_samples = r.ReadUe16() + sps.pic_height_in_luma_samples = r.ReadUe16() + + sps.conformance_window_flag = r.ReadBit() + if sps.conformance_window_flag == 1 { + sps.conf_win_left_offset = r.ReadUe16() + sps.conf_win_right_offset = r.ReadUe16() + sps.conf_win_top_offset = r.ReadUe16() + sps.conf_win_bottom_offset = r.ReadUe16() + } + + sps.bit_depth_luma_minus8 = r.ReadUe8() + sps.bit_depth_chroma_minus8 = r.ReadUe8() + + sps.log2_max_pic_order_cnt_lsb_minus4 = r.ReadUe8() + + sps.sps_sub_layer_ordering_info_present_flag = r.ReadBit() + loopStart := uint8(0) + if sps.sps_sub_layer_ordering_info_present_flag == 1 { + loopStart = sps.sps_max_sub_layers_minus1 + } + for i := loopStart; i <= sps.sps_max_sub_layers_minus1; i++ { + sps.sps_max_dec_pic_buffering_minus1[i] = r.ReadUe8() + sps.sps_max_num_reorder_pics[i] = r.ReadUe8() + sps.sps_max_latency_increase_plus1[i] = r.ReadUe() + } + + if sps.sps_sub_layer_ordering_info_present_flag == 0 { + for i := uint8(0); i < sps.sps_max_sub_layers_minus1; i++ { + + sps.sps_max_dec_pic_buffering_minus1[i] = + sps.sps_max_dec_pic_buffering_minus1[sps.sps_max_sub_layers_minus1] + sps.sps_max_num_reorder_pics[i] = + sps.sps_max_num_reorder_pics[sps.sps_max_sub_layers_minus1] + sps.sps_max_latency_increase_plus1[i] = + sps.sps_max_latency_increase_plus1[sps.sps_max_sub_layers_minus1] + } + } + + sps.log2_min_luma_coding_block_size_minus3 = r.ReadUe8() + min_cb_log2_size_y := sps.log2_min_luma_coding_block_size_minus3 + 3 + + sps.log2_diff_max_min_luma_coding_block_size = r.ReadUe8() + // ctb_log2_size_y := min_cb_log2_size_y + + // sps.log2_diff_max_min_luma_coding_block_size + + min_cb_size_y := uint16(1) << min_cb_log2_size_y + if (sps.pic_width_in_luma_samples%min_cb_size_y) > 0 || + (sps.pic_height_in_luma_samples%min_cb_size_y) > 0 { + return fmt.Errorf("Invalid dimensions: %v%v not divisible by MinCbSizeY = %v.\n", + sps.pic_width_in_luma_samples, + sps.pic_height_in_luma_samples, + min_cb_size_y) + } + + sps.log2_min_luma_transform_block_size_minus2 = r.ReadUe8() + // min_tb_log2_size_y := sps.log2_min_luma_transform_block_size_minus2 + 2 + + sps.log2_diff_max_min_luma_transform_block_size = r.ReadUe8() + + sps.max_transform_hierarchy_depth_inter = r.ReadUe8() + sps.max_transform_hierarchy_depth_intra = r.ReadUe8() + + sps.scaling_list_enabled_flag = r.ReadBit() + if sps.scaling_list_enabled_flag == 1 { + sps.sps_scaling_list_data_present_flag = r.ReadBit() + if sps.sps_scaling_list_data_present_flag == 1 { + sps.scaling_list = new(H265RawScalingList) + sps.scaling_list.decode(r) + } + } + + sps.amp_enabled_flag = r.ReadBit() + sps.sample_adaptive_offset_enabled_flag = r.ReadBit() + + sps.pcm_enabled_flag = r.ReadBit() + if sps.pcm_enabled_flag == 1 { + sps.pcm_sample_bit_depth_luma_minus1 = r.ReadUint8(4) + sps.pcm_sample_bit_depth_chroma_minus1 = r.ReadUint8(4) + + sps.log2_min_pcm_luma_coding_block_size_minus3 = r.ReadUe8() + sps.log2_diff_max_min_pcm_luma_coding_block_size = r.ReadUe8() + + sps.pcm_loop_filter_disabled_flag = r.ReadBit() + } + + sps.num_short_term_ref_pic_sets = r.ReadUe8() + if sps.num_short_term_ref_pic_sets > 0 { + sps.st_ref_pic_set = make([]H265RawSTRefPicSet, sps.num_short_term_ref_pic_sets) + for i := uint8(0); i < sps.num_short_term_ref_pic_sets; i++ { + sps.st_ref_pic_set[i].decode(r, i, sps) + } + } + + sps.long_term_ref_pics_present_flag = r.ReadBit() + if sps.long_term_ref_pics_present_flag == 1 { + sps.num_long_term_ref_pics_sps = r.ReadUe8() + for i := uint8(0); i < sps.num_long_term_ref_pics_sps; i++ { + sps.lt_ref_pic_poc_lsb_sps[i] = r.ReadUint16(int(sps.log2_max_pic_order_cnt_lsb_minus4 + 4)) + sps.used_by_curr_pic_lt_sps_flag[i] = r.ReadBit() + } + } + + sps.sps_temporal_mvp_enabled_flag = r.ReadBit() + sps.strong_intra_smoothing_enabled_flag = r.ReadBit() + + sps.vui_parameters_present_flag = r.ReadBit() + if sps.vui_parameters_present_flag == 1 { + sps.vui.decode(r, sps) + } else { + sps.vui.setDefault(sps) + } + + sps.sps_extension_present_flag = r.ReadBit() + + if sps.sps_extension_present_flag == 1 { + sps.sps_range_extension_flag = r.ReadBit() + sps.sps_multilayer_extension_flag = r.ReadBit() + sps.sps_3d_extension_flag = r.ReadBit() + sps.sps_scc_extension_flag = r.ReadBit() + sps.sps_extension_4bits = r.ReadUint8(4) + } + + // if (sps.sps_range_extension_flag) + // CHECK(FUNC(sps_range_extension)(ctx, rw, current)); + // if (sps.sps_multilayer_extension_flag) + // return AVERROR_PATCHWELCOME; + // if (sps.sps_3d_extension_flag) + // return AVERROR_PATCHWELCOME; + // if (sps.sps_scc_extension_flag) + // CHECK(FUNC(sps_scc_extension)(ctx, rw, current)); + // if (sps.sps_extension_4bits) + // CHECK(FUNC(extension_data)(ctx, rw, &sps.extension_data)); + + // CHECK(FUNC(rbsp_trailing_bits)(ctx, rw)); + + return +} diff --git a/av/codec/hevc/sps_test.go b/av/codec/hevc/sps_test.go new file mode 100644 index 0000000..f0e4019 --- /dev/null +++ b/av/codec/hevc/sps_test.go @@ -0,0 +1,66 @@ +// Copyright (c) 2019,CAOHONGJU All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package hevc + +import ( + "testing" +) + +func TestH265RawSPS_DecodeString(t *testing.T) { + tests := []struct { + name string + b64 string + wantW int + wantH int + wantFR float64 + wantErr bool + }{ + { + "base64_1", + "QgEBAWAAAAMAkAAAAwAAAwBdoAKAgC0WWVmkkyuAQAAA+kAAF3AC", + 1280, + 720, + float64(24000) / float64(1001), + false, + }, + { + "base64_2", + "QgEBBAgAAAMAnQgAAAMAAF2wAoCALRZZWaSTK4BAAAADAEAAAAeC", + 1280, + 720, + 30, + false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sps := &H265RawSPS{} + if err := sps.DecodeString(tt.b64); (err != nil) != tt.wantErr { + t.Errorf("RawSPS.Parse() error = %v, wantErr %v", err, tt.wantErr) + } + if sps.Width() != tt.wantW { + t.Errorf("RawSPS.Parse() Width = %v, wantWidth %v", sps.Width(), tt.wantW) + } + if sps.Height() != tt.wantH { + t.Errorf("RawSPS.Parse() Height = %v, wantHeight %v", sps.Height(), tt.wantH) + } + if sps.FrameRate() != tt.wantFR { + t.Errorf("RawSPS.Parse() FrameRate = %v, wantFrameRate %v", sps.FrameRate(), tt.wantFR) + } + }) + } +} + +func Benchmark_SPSDecode(b *testing.B) { + spsstr := "QgEBAWAAAAMAkAAAAwAAAwBdoAKAgC0WWVmkkyuAQAAA+kAAF3ACQgEBAWAAAAMAkAAAAwAAAwBdoAKAgC0WWVmkkyuAQAAA+kAAF3AC" + + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + sps := &H265RawSPS{} + _ = sps.DecodeString(spsstr) + } + }) +} diff --git a/av/codec/hevc/vps.go b/av/codec/hevc/vps.go new file mode 100644 index 0000000..67cf358 --- /dev/null +++ b/av/codec/hevc/vps.go @@ -0,0 +1,493 @@ +// Copyright (c) 2019,CAOHONGJU All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. +// +// Translate from FFmpeg cbs_h265.h cbs_h265_syntax_template.c +// +package hevc + +import ( + "encoding/base64" + "errors" + "fmt" + "runtime/debug" + + "github.com/cnotch/ipchub/utils" + "github.com/cnotch/ipchub/utils/bits" +) + +type H265RawNALUnitHeader struct { + nal_unit_type uint8 + nuh_layer_id uint8 + nuh_temporal_id_plus1 uint8 +} + +func (h *H265RawNALUnitHeader) decode(r *bits.Reader) (err error) { + r.Skip(1) //forbidden_zero_bit + h.nal_unit_type = r.ReadUint8(6) + h.nuh_layer_id = r.ReadUint8(6) + h.nuh_temporal_id_plus1 = r.ReadUint8(3) + return +} + +type H265RawProfileTierLevel struct { + general_profile_space uint8 + general_tier_flag uint8 + general_profile_idc uint8 + + general_profile_compatibility_flag [32]uint8 + + general_progressive_source_flag uint8 + general_interlaced_source_flag uint8 + general_non_packed_constraint_flag uint8 + general_frame_only_constraint_flag uint8 + + general_max_12bit_constraint_flag uint8 + general_max_10bit_constraint_flag uint8 + general_max_8bit_constraint_flag uint8 + general_max_422chroma_constraint_flag uint8 + general_max_420chroma_constraint_flag uint8 + general_max_monochrome_constraint_flag uint8 + general_intra_constraint_flag uint8 + general_one_picture_only_constraint_flag uint8 + general_lower_bit_rate_constraint_flag uint8 + general_max_14bit_constraint_flag uint8 + + general_inbld_flag uint8 + + general_level_idc uint8 + + sub_layer_profile_present_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_level_present_flag [HEVC_MAX_SUB_LAYERS]uint8 + + sub_layer_profile_space [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_tier_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_profile_idc [HEVC_MAX_SUB_LAYERS]uint8 + + sub_layer_profile_compatibility_flag [HEVC_MAX_SUB_LAYERS][32]uint8 + + sub_layer_progressive_source_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_interlaced_source_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_non_packed_constraint_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_frame_only_constraint_flag [HEVC_MAX_SUB_LAYERS]uint8 + + sub_layer_max_12bit_constraint_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_max_10bit_constraint_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_max_8bit_constraint_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_max_422chroma_constraint_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_max_420chroma_constraint_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_max_monochrome_constraint_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_intra_constraint_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_one_picture_only_constraint_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_lower_bit_rate_constraint_flag [HEVC_MAX_SUB_LAYERS]uint8 + sub_layer_max_14bit_constraint_flag [HEVC_MAX_SUB_LAYERS]uint8 + + sub_layer_inbld_flag [HEVC_MAX_SUB_LAYERS]uint8 + + sub_layer_level_idc [HEVC_MAX_SUB_LAYERS]uint8 +} +type profile_compatible struct { + profile_idc uint8 + profile_compatibility_flag [32]uint8 +} + +func (pc profile_compatible) compatible(idc uint8) bool { + return pc.profile_idc == idc || pc.profile_compatibility_flag[idc] == 1 +} + +func (ptl *H265RawProfileTierLevel) decode(r *bits.Reader, + profile_present_flag bool, max_num_sub_layers_minus1 int) (err error) { + + if profile_present_flag { + ptl.general_profile_space = r.ReadUint8(2) + ptl.general_tier_flag = r.ReadBit() + ptl.general_profile_idc = r.ReadUint8(5) + + for j := 0; j < 32; j++ { + ptl.general_profile_compatibility_flag[j] = r.ReadBit() + } + + ptl.general_progressive_source_flag = r.ReadBit() + ptl.general_interlaced_source_flag = r.ReadBit() + ptl.general_non_packed_constraint_flag = r.ReadBit() + ptl.general_frame_only_constraint_flag = r.ReadBit() + + pc := profile_compatible{ptl.general_profile_idc, ptl.general_profile_compatibility_flag} + if pc.compatible(4) || pc.compatible(5) || + pc.compatible(6) || pc.compatible(7) || + pc.compatible(8) || pc.compatible(9) || + pc.compatible(10) { + ptl.general_max_12bit_constraint_flag = r.ReadBit() + ptl.general_max_10bit_constraint_flag = r.ReadBit() + ptl.general_max_8bit_constraint_flag = r.ReadBit() + ptl.general_max_422chroma_constraint_flag = r.ReadBit() + ptl.general_max_420chroma_constraint_flag = r.ReadBit() + ptl.general_max_monochrome_constraint_flag = r.ReadBit() + ptl.general_intra_constraint_flag = r.ReadBit() + ptl.general_one_picture_only_constraint_flag = r.ReadBit() + ptl.general_lower_bit_rate_constraint_flag = r.ReadBit() + + if pc.compatible(5) || pc.compatible(9) || pc.compatible(10) { + ptl.general_max_14bit_constraint_flag = r.ReadBit() + r.Skip(33) // general_reserved_zero_33bits + + } else { + r.Skip(34) //general_reserved_zero_34bits + } + } else if pc.compatible(2) { + r.Skip(7) // general_reserved_zero_7bits + ptl.general_one_picture_only_constraint_flag = r.ReadBit() + r.Skip(35) // general_reserved_zero_35bits + } else { + r.Skip(43) // general_reserved_zero_43bits + } + + if pc.compatible(1) || pc.compatible(2) || + pc.compatible(3) || pc.compatible(4) || + pc.compatible(5) || pc.compatible(9) { + ptl.general_inbld_flag = r.ReadBit() + } else { + r.Skip(1) // general_reserved_zero_bit + } + } + + ptl.general_level_idc = r.ReadUint8(8) + + for i := 0; i < max_num_sub_layers_minus1; i++ { + ptl.sub_layer_profile_present_flag[i] = r.ReadBit() + ptl.sub_layer_level_present_flag[i] = r.ReadBit() + } + + if max_num_sub_layers_minus1 > 0 { + for i := max_num_sub_layers_minus1; i < 8; i++ { + r.Skip(2) // reserved_zero_2bits + } + } + + for i := 0; i < max_num_sub_layers_minus1; i++ { + if ptl.sub_layer_profile_present_flag[i] == 1 { + ptl.sub_layer_profile_space[i] = r.ReadUint8(2) + ptl.sub_layer_tier_flag[i] = r.ReadBit() + ptl.sub_layer_profile_idc[i] = r.ReadUint8(5) + + for j := 0; j < 32; j++ { + ptl.sub_layer_profile_compatibility_flag[i][j] = r.ReadBit() + } + + ptl.sub_layer_progressive_source_flag[i] = r.ReadBit() + ptl.sub_layer_interlaced_source_flag[i] = r.ReadBit() + ptl.sub_layer_non_packed_constraint_flag[i] = r.ReadBit() + ptl.sub_layer_frame_only_constraint_flag[i] = r.ReadBit() + + pc := profile_compatible{ptl.sub_layer_profile_idc[i], ptl.sub_layer_profile_compatibility_flag[i]} + if pc.compatible(4) || pc.compatible(5) || + pc.compatible(6) || pc.compatible(7) || + pc.compatible(8) || pc.compatible(9) || + pc.compatible(10) { + ptl.sub_layer_max_12bit_constraint_flag[i] = r.ReadBit() + ptl.sub_layer_max_10bit_constraint_flag[i] = r.ReadBit() + ptl.sub_layer_max_8bit_constraint_flag[i] = r.ReadBit() + ptl.sub_layer_max_422chroma_constraint_flag[i] = r.ReadBit() + ptl.sub_layer_max_420chroma_constraint_flag[i] = r.ReadBit() + ptl.sub_layer_max_monochrome_constraint_flag[i] = r.ReadBit() + ptl.sub_layer_intra_constraint_flag[i] = r.ReadBit() + ptl.sub_layer_one_picture_only_constraint_flag[i] = r.ReadBit() + ptl.sub_layer_lower_bit_rate_constraint_flag[i] = r.ReadBit() + + if pc.compatible(5) { + ptl.sub_layer_max_14bit_constraint_flag[i] = r.ReadBit() + r.Skip(33) // sub_layer_reserved_zero_33bits + } else { + r.Skip(34) // sub_layer_reserved_zero_34bits + } + } else if pc.compatible(2) { + r.Skip(7) // sub_layer_reserved_zero_7bits + ptl.sub_layer_one_picture_only_constraint_flag[i] = r.ReadBit() + r.Skip(35) // sub_layer_reserved_zero_35bits + } else { + r.Skip(43) // sub_layer_reserved_zero_43bits + } + + if pc.compatible(1) || pc.compatible(2) || + pc.compatible(3) || pc.compatible(4) || + pc.compatible(5) || pc.compatible(9) { + ptl.sub_layer_inbld_flag[i] = r.ReadBit() + } else { + r.Skip(1) // sub_layer_reserved_zero_bit + } + } + if ptl.sub_layer_level_present_flag[i] == 1 { + ptl.sub_layer_level_idc[i] = r.ReadUint8(8) + } + } + return +} + +type H265RawSubLayerHRDParameters struct { + bit_rate_value_minus1 [HEVC_MAX_CPB_CNT]uint32 + cpb_size_value_minus1 [HEVC_MAX_CPB_CNT]uint32 + cpb_size_du_value_minus1 [HEVC_MAX_CPB_CNT]uint32 + bit_rate_du_value_minus1 [HEVC_MAX_CPB_CNT]uint32 + cbr_flag [HEVC_MAX_CPB_CNT]uint8 +} + +func (shrd *H265RawSubLayerHRDParameters) decode(r *bits.Reader, + sub_pic_hrd_params_present_flag bool, cpb_cnt_minus1 int) (err error) { + for i := 0; i <= cpb_cnt_minus1; i++ { + shrd.bit_rate_value_minus1[i] = r.ReadUe() + shrd.cpb_size_value_minus1[i] = r.ReadUe() + if sub_pic_hrd_params_present_flag { + shrd.cpb_size_du_value_minus1[i] = r.ReadUe() + shrd.bit_rate_du_value_minus1[i] = r.ReadUe() + } + shrd.cbr_flag[i] = r.ReadBit() + } + return +} + +type H265RawHRDParameters struct { + nal_hrd_parameters_present_flag uint8 + vcl_hrd_parameters_present_flag uint8 + + sub_pic_hrd_params_present_flag uint8 + tick_divisor_minus2 uint8 + du_cpb_removal_delay_increment_length_minus1 uint8 + sub_pic_cpb_params_in_pic_timing_sei_flag uint8 + dpb_output_delay_du_length_minus1 uint8 + + bit_rate_scale uint8 + cpb_size_scale uint8 + cpb_size_du_scale uint8 + + initial_cpb_removal_delay_length_minus1 uint8 + au_cpb_removal_delay_length_minus1 uint8 + dpb_output_delay_length_minus1 uint8 + + fixed_pic_rate_general_flag [HEVC_MAX_SUB_LAYERS]uint8 + fixed_pic_rate_within_cvs_flag [HEVC_MAX_SUB_LAYERS]uint8 + elemental_duration_in_tc_minus1 [HEVC_MAX_SUB_LAYERS]uint16 + low_delay_hrd_flag [HEVC_MAX_SUB_LAYERS]uint8 + cpb_cnt_minus1 [HEVC_MAX_SUB_LAYERS]uint8 + nal_sub_layer_hrd_parameters [HEVC_MAX_SUB_LAYERS]H265RawSubLayerHRDParameters + vcl_sub_layer_hrd_parameters [HEVC_MAX_SUB_LAYERS]H265RawSubLayerHRDParameters +} + +func (hrd *H265RawHRDParameters) decode(r *bits.Reader, + common_inf_present_flag bool, max_num_sub_layers_minus1 int) (err error) { + if common_inf_present_flag { + hrd.nal_hrd_parameters_present_flag = r.ReadBit() + hrd.vcl_hrd_parameters_present_flag = r.ReadBit() + + if hrd.nal_hrd_parameters_present_flag == 1 || + hrd.vcl_hrd_parameters_present_flag == 1 { + hrd.sub_pic_hrd_params_present_flag = r.ReadBit() + if hrd.sub_pic_hrd_params_present_flag == 1 { + hrd.tick_divisor_minus2 = r.ReadUint8(8) + hrd.du_cpb_removal_delay_increment_length_minus1 = r.ReadUint8(5) + hrd.sub_pic_cpb_params_in_pic_timing_sei_flag = r.ReadBit() + hrd.dpb_output_delay_du_length_minus1 = r.ReadUint8(5) + } + + hrd.bit_rate_scale = r.ReadUint8(4) + hrd.cpb_size_scale = r.ReadUint8(4) + if hrd.sub_pic_hrd_params_present_flag == 1 { + hrd.cpb_size_du_scale = r.ReadUint8(4) + + } + + hrd.initial_cpb_removal_delay_length_minus1 = r.ReadUint8(5) + hrd.au_cpb_removal_delay_length_minus1 = r.ReadUint8(5) + hrd.dpb_output_delay_length_minus1 = r.ReadUint8(5) + } else { + hrd.sub_pic_hrd_params_present_flag = 0 + + hrd.initial_cpb_removal_delay_length_minus1 = 23 + hrd.au_cpb_removal_delay_length_minus1 = 23 + hrd.dpb_output_delay_length_minus1 = 23 + } + } + + for i := 0; i <= max_num_sub_layers_minus1; i++ { + hrd.fixed_pic_rate_general_flag[i] = r.ReadBit() + + hrd.fixed_pic_rate_within_cvs_flag[i] = 1 + if hrd.fixed_pic_rate_general_flag[i] == 0 { + hrd.fixed_pic_rate_within_cvs_flag[i] = r.ReadBit() + } + + if hrd.fixed_pic_rate_within_cvs_flag[i] == 1 { + hrd.elemental_duration_in_tc_minus1[i] = r.ReadUe16() + hrd.low_delay_hrd_flag[i] = 0 + } else { + hrd.low_delay_hrd_flag[i] = r.ReadBit() + } + + hrd.cpb_cnt_minus1[i] = 0 + if hrd.low_delay_hrd_flag[i] == 0 { + hrd.cpb_cnt_minus1[i] = r.ReadUe8() + } + + if hrd.nal_hrd_parameters_present_flag == 1 { + hrd.nal_sub_layer_hrd_parameters[i].decode(r, hrd.sub_pic_hrd_params_present_flag == 1, int(hrd.cpb_cnt_minus1[i])) + } + if hrd.vcl_hrd_parameters_present_flag == 1 { + hrd.vcl_sub_layer_hrd_parameters[i].decode(r, hrd.sub_pic_hrd_params_present_flag == 1, int(hrd.cpb_cnt_minus1[i])) + } + } + + return +} + +type H265RawVPS struct { + nal_unit_header H265RawNALUnitHeader + + vps_video_parameter_set_id uint8 + + vps_base_layer_internal_flag uint8 + vps_base_layer_available_flag uint8 + vps_max_layers_minus1 uint8 + vps_max_sub_layers_minus1 uint8 + vps_temporal_id_nesting_flag uint8 + + profile_tier_level H265RawProfileTierLevel + + vps_sub_layer_ordering_info_present_flag uint8 + vps_max_dec_pic_buffering_minus1 [HEVC_MAX_SUB_LAYERS]uint8 + vps_max_num_reorder_pics [HEVC_MAX_SUB_LAYERS]uint8 + vps_max_latency_increase_plus1 [HEVC_MAX_SUB_LAYERS]uint32 + + vps_max_layer_id uint8 + vps_num_layer_sets_minus1 uint16 + layer_id_included_flag [][HEVC_MAX_LAYERS]uint8 //[HEVC_MAX_LAYER_SETS][HEVC_MAX_LAYERS]uint8 + + vps_timing_info_present_flag uint8 + vps_num_units_in_tick uint32 + vps_time_scale uint32 + vps_poc_proportional_to_timing_flag uint8 + vps_num_ticks_poc_diff_one_minus1 uint32 + vps_num_hrd_parameters uint16 + hrd_layer_set_idx []uint16 //[HEVC_MAX_LAYER_SETS]uint16 + cprms_present_flag []uint8 //[HEVC_MAX_LAYER_SETS]uint8 + hrd_parameters []H265RawHRDParameters //[HEVC_MAX_LAYER_SETS]H265RawHRDParameters + + vps_extension_flag uint8 + // extension_data H265RawExtensionData +} + +// DecodeString 从 base64 字串解码 vps NAL +func (vps *H265RawVPS) DecodeString(b64 string) error { + data, err := base64.StdEncoding.DecodeString(b64) + if err != nil { + return err + } + return vps.Decode(data) +} + +// Decode 从字节序列中解码 vps NAL +func (vps *H265RawVPS) Decode(data []byte) (err error) { + defer func() { + if r := recover(); r != nil { + err = fmt.Errorf("RawVPS decode panic;r = %v \n %s", r, debug.Stack()) + } + }() + + vpsWEB := utils.RemoveH264or5EmulationBytes(data) + if len(vpsWEB) < 4 { + return errors.New("The data is not enough") + } + + r := bits.NewReader(vpsWEB) + if err = vps.nal_unit_header.decode(r); err != nil { + return + } + + if vps.nal_unit_header.nal_unit_type != NalVps { + return errors.New("not is vps NAL UNIT") + } + + vps.vps_video_parameter_set_id = r.ReadUint8(4) + + vps.vps_base_layer_internal_flag = r.ReadBit() + vps.vps_base_layer_available_flag = r.ReadBit() + vps.vps_max_layers_minus1 = r.ReadUint8(6) + vps.vps_max_sub_layers_minus1 = r.ReadUint8(3) + vps.vps_temporal_id_nesting_flag = r.ReadBit() + + if vps.vps_max_sub_layers_minus1 == 0 && + vps.vps_temporal_id_nesting_flag != 1 { + return errors.New("Invalid stream: vps_temporal_id_nesting_flag must be 1 if vps_max_sub_layers_minus1 is 0.\n") + } + + r.Skip(16) // vps_reserved_0xffff_16bits + if err = vps.profile_tier_level.decode(r, true, int(vps.vps_max_sub_layers_minus1)); err != nil { + return + } + + vps.vps_sub_layer_ordering_info_present_flag = r.ReadBit() + i := vps.vps_max_sub_layers_minus1 + if vps.vps_sub_layer_ordering_info_present_flag == 1 { + i = 0 + } + for ; i <= vps.vps_max_sub_layers_minus1; i++ { + vps.vps_max_dec_pic_buffering_minus1[i] = r.ReadUe8() + vps.vps_max_num_reorder_pics[i] = r.ReadUe8() + vps.vps_max_latency_increase_plus1[i] = r.ReadUe() + } + if vps.vps_sub_layer_ordering_info_present_flag == 0 { + for i := uint8(0); i < vps.vps_max_sub_layers_minus1; i++ { + vps.vps_max_dec_pic_buffering_minus1[i] = + vps.vps_max_dec_pic_buffering_minus1[vps.vps_max_sub_layers_minus1] + vps.vps_max_num_reorder_pics[i] = + vps.vps_max_num_reorder_pics[vps.vps_max_sub_layers_minus1] + vps.vps_max_latency_increase_plus1[i] = + vps.vps_max_latency_increase_plus1[vps.vps_max_sub_layers_minus1] + } + } + + vps.vps_max_layer_id = r.ReadUint8(6) + vps.vps_num_layer_sets_minus1 = r.ReadUe16() + vps.layer_id_included_flag = make([][HEVC_MAX_LAYERS]uint8, vps.vps_num_layer_sets_minus1+1) + for i := uint16(1); i <= vps.vps_num_layer_sets_minus1; i++ { + for j := uint8(0); j <= vps.vps_max_layer_id; j++ { + vps.layer_id_included_flag[i][j] = r.ReadBit() + } + } + for j := uint8(0); j <= vps.vps_max_layer_id; j++ { + vps.layer_id_included_flag[0][j] = 1 + if j == 0 { + vps.layer_id_included_flag[0][j] = 0 + } + } + vps.vps_timing_info_present_flag = r.ReadBit() + if vps.vps_timing_info_present_flag == 1 { + vps.vps_num_units_in_tick = r.ReadUint32(32) + vps.vps_time_scale = r.ReadUint32(32) + vps.vps_poc_proportional_to_timing_flag = r.ReadBit() + if vps.vps_poc_proportional_to_timing_flag == 1 { + vps.vps_num_ticks_poc_diff_one_minus1 = r.ReadUe() + } + + vps.vps_num_hrd_parameters = r.ReadUe16() + if vps.vps_num_hrd_parameters > 0 { + vps.hrd_layer_set_idx = make([]uint16, vps.vps_num_hrd_parameters) + vps.cprms_present_flag = make([]uint8, vps.vps_num_hrd_parameters) + vps.hrd_parameters = make([]H265RawHRDParameters, vps.vps_num_hrd_parameters) + } + for i := uint16(0); i < vps.vps_num_hrd_parameters; i++ { + vps.hrd_layer_set_idx[i] = r.ReadUe16() + if i > 0 { + vps.cprms_present_flag[i] = r.ReadBit() + } else { + vps.cprms_present_flag[0] = 1 + } + if err = vps.hrd_parameters[i].decode(r, + vps.cprms_present_flag[i] == 1, + int(vps.vps_max_sub_layers_minus1)); err != nil { + return + } + } + } + + vps.vps_extension_flag = r.ReadBit() + return +} diff --git a/av/codec/hevc/vps_test.go b/av/codec/hevc/vps_test.go new file mode 100644 index 0000000..802fbf7 --- /dev/null +++ b/av/codec/hevc/vps_test.go @@ -0,0 +1,48 @@ +// Copyright (c) 2019,CAOHONGJU All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package hevc + +import ( + "testing" +) + +func TestH265RawVPS_DecodeString(t *testing.T) { + tests := []struct { + name string + b64 string + wantErr bool + }{ + { + "base64_1", + "QAEMAf//BAgAAAMAnQgAAAMAAF2VmAk=", + false, + }, + { + "base64_2", + "QAEMAf//AWAAAAMAkAAAAwAAAwBdlZgJ", + false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + vps := &H265RawVPS{} + if err := vps.DecodeString(tt.b64); (err != nil) != tt.wantErr { + t.Errorf("RawVPS.Parse() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func Benchmark_VPSDecode(b *testing.B) { + vpsstr := "QAEMAf//AWAAAAMAkAAAAwAAAwBdlZgJ" + + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + vps := &H265RawVPS{} + _ = vps.DecodeString(vpsstr) + } + }) +} diff --git a/av/format/sdp/parsemeta.go b/av/format/sdp/parsemeta.go index 52c114a..fce74ff 100644 --- a/av/format/sdp/parsemeta.go +++ b/av/format/sdp/parsemeta.go @@ -12,6 +12,7 @@ import ( "github.com/cnotch/ipchub/av/codec" "github.com/cnotch/ipchub/av/codec/aac" "github.com/cnotch/ipchub/av/codec/h264" + "github.com/cnotch/ipchub/av/codec/hevc" "github.com/cnotch/ipchub/utils/scan" "github.com/pixelbender/go-sdp/sdp" ) @@ -126,7 +127,14 @@ func parseVideoMeta(m *sdp.Format, video *codec.VideoMeta) { } case "h265", "H265", "hevc", "HEVC": video.Codec = "H265" - // TODO: parse H265 vps sps pps + for _, p := range m.Params { + i := strings.Index(p, "sprop-") + if i < 0 { + continue + } + parseH265VpsSpsPps(p[i:], video) + break + } } } @@ -150,3 +158,34 @@ func parseH264SpsPps(s string, video *codec.VideoMeta) { _ = h264.MetadataIsReady(video) } + +func parseH265VpsSpsPps(s string, video *codec.VideoMeta) { + var advance, token string + continueScan := true + advance = s + for continueScan { + advance, token, continueScan = scan.Semicolon.Scan(advance) + name, value, ok := scan.EqualPair.Scan(token) + if ok { + var ps *[]byte + var err error + switch name { + case "sprop-vps": + ps = &video.Vps + case "sprop-sps": + ps = &video.Sps + case "sprop-pps": + ps = &video.Pps + } + if ps == nil { + continue + } + + if *ps, err = base64.StdEncoding.DecodeString(value); err != nil { + return + } + } + } + + _ = hevc.MetadataIsReady(video) +}