[go: nahoru, domu]

blob: 87805b2a818983d533b51f721e35a8891a64f6ff [file] [log] [blame]
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This file contains an implementation of an H265 Annex-B video stream parser.
#ifndef MEDIA_VIDEO_H265_PARSER_H_
#define MEDIA_VIDEO_H265_PARSER_H_
#include <stdint.h>
#include <sys/types.h>
#include <vector>
#include "base/containers/flat_map.h"
#include "base/macros.h"
#include "media/base/media_export.h"
#include "media/base/ranges.h"
#include "media/base/video_color_space.h"
#include "media/video/h264_bit_reader.h"
#include "media/video/h264_parser.h"
namespace media {
struct SubsampleEntry;
// For explanations of each struct and its members, see H.265 specification
// at http://www.itu.int/rec/T-REC-H.265.
struct MEDIA_EXPORT H265NALU {
H265NALU();
// NAL Unit types are taken from Table 7-1 of HEVC/H265 standard
// http://www.itu.int/rec/T-REC-H.265-201410-I/en
enum Type {
TRAIL_N = 0,
TRAIL_R = 1,
TSA_N = 2,
TSA_R = 3,
STSA_N = 4,
STSA_R = 5,
RADL_N = 6,
RADL_R = 7,
RASL_N = 8,
RASL_R = 9,
RSV_VCL_N10 = 10,
RSV_VCL_R11 = 11,
RSV_VCL_N12 = 12,
RSV_VCL_R13 = 13,
RSV_VCL_N14 = 14,
RSV_VCL_R15 = 15,
BLA_W_LP = 16,
BLA_W_RADL = 17,
BLA_N_LP = 18,
IDR_W_RADL = 19,
IDR_N_LP = 20,
CRA_NUT = 21,
RSV_IRAP_VCL22 = 22,
RSV_IRAP_VCL23 = 23,
RSV_VCL24 = 24,
RSV_VCL25 = 25,
RSV_VCL26 = 26,
RSV_VCL27 = 27,
RSV_VCL28 = 28,
RSV_VCL29 = 29,
RSV_VCL30 = 30,
RSV_VCL31 = 31,
VPS_NUT = 32,
SPS_NUT = 33,
PPS_NUT = 34,
AUD_NUT = 35,
EOS_NUT = 36,
EOB_NUT = 37,
FD_NUT = 38,
PREFIX_SEI_NUT = 39,
SUFFIX_SEI_NUT = 40,
RSV_NVCL41 = 41,
RSV_NVCL42 = 42,
RSV_NVCL43 = 43,
RSV_NVCL44 = 44,
RSV_NVCL45 = 45,
RSV_NVCL46 = 46,
RSV_NVCL47 = 47,
UNSPEC48 = 48,
UNSPEC49 = 49,
UNSPEC50 = 50,
UNSPEC51 = 51,
UNSPEC52 = 52,
UNSPEC53 = 53,
UNSPEC54 = 54,
UNSPEC55 = 55,
UNSPEC56 = 56,
UNSPEC57 = 57,
UNSPEC58 = 58,
UNSPEC59 = 59,
UNSPEC60 = 60,
UNSPEC61 = 61,
UNSPEC62 = 62,
UNSPEC63 = 63,
};
// After (without) start code; we don't own the underlying memory
// and a shallow copy should be made when copying this struct.
const uint8_t* data;
off_t size; // From after start code to start code of next NALU (or EOS).
int nal_unit_type;
int nuh_layer_id;
int nuh_temporal_id_plus1;
};
enum {
kMaxLongTermRefPicSets = 32, // 7.4.3.2.1
kMaxShortTermRefPicSets = 64, // 7.4.3.2.1
kMaxSubLayers = 7, // 7.4.3.1 & 7.4.3.2.1 [v|s]ps_max_sub_layers_minus1 + 1
kMaxDpbSize = 16, // A.4.2
kMaxRefIdxActive = 15, // 7.4.7.1 num_ref_idx_l{0,1}_active_minus1 + 1
};
struct MEDIA_EXPORT H265ProfileTierLevel {
H265ProfileTierLevel();
// From Annex A.3.
enum H264ProfileIdc {
kProfileIdcMain = 1,
kProfileIdcMain10 = 2,
kProfileIdcMainStill = 3,
kProfileIdcRangeExtensions = 4,
kProfileIdcHighThroughput = 5,
kProfileIdcScreenContentCoding = 9,
kProfileIdcHighThroughputScreenContentCoding = 11,
};
// Syntax elements.
int general_profile_idc;
int general_level_idc; // 30x the actual level.
// From Table A.8 - General tier and level limits.
int GetMaxLumaPs() const;
// From A.4.2 - Profile-specific level limits for the video profiles.
size_t GetDpbMaxPicBuf() const;
};
struct MEDIA_EXPORT H265ScalingListData {
H265ScalingListData();
enum {
kDefaultScalingListSize0Values = 16, // Table 7-5, all values are 16
kScalingListSizeId0Count = 16, // 7.4.5
kScalingListSizeId1To3Count = 64, // 7.4.5
kNumScalingListMatrices = 6,
};
// TODO(jkardatzke): Optimize storage of the 32x32 since only indices 0 and 3
// are actually used. Also change it in the accelerator delegate if that is
// done.
// Syntax elements.
int scaling_list_dc_coef_16x16[kNumScalingListMatrices];
int scaling_list_dc_coef_32x32[kNumScalingListMatrices];
int scaling_list_4x4[kNumScalingListMatrices][kScalingListSizeId0Count];
int scaling_list_8x8[kNumScalingListMatrices][kScalingListSizeId1To3Count];
int scaling_list_16x16[kNumScalingListMatrices][kScalingListSizeId1To3Count];
int scaling_list_32x32[kNumScalingListMatrices][kScalingListSizeId1To3Count];
};
struct MEDIA_EXPORT H265StRefPicSet {
H265StRefPicSet();
// Syntax elements.
int num_negative_pics;
int num_positive_pics;
int delta_poc_s0[kMaxShortTermRefPicSets];
int used_by_curr_pic_s0[kMaxShortTermRefPicSets];
int delta_poc_s1[kMaxShortTermRefPicSets];
int used_by_curr_pic_s1[kMaxShortTermRefPicSets];
// Calculated fields.
int num_delta_pocs;
};
struct MEDIA_EXPORT H265VUIParameters {
H265VUIParameters();
// Syntax elements.
int sar_width;
int sar_height;
bool video_full_range_flag;
bool colour_description_present_flag;
int colour_primaries;
int transfer_characteristics;
int matrix_coeffs;
int def_disp_win_left_offset;
int def_disp_win_right_offset;
int def_disp_win_top_offset;
int def_disp_win_bottom_offset;
};
struct MEDIA_EXPORT H265SPS {
H265SPS();
// Syntax elements.
int sps_max_sub_layers_minus1;
H265ProfileTierLevel profile_tier_level;
int sps_seq_parameter_set_id;
int chroma_format_idc;
bool separate_colour_plane_flag;
int pic_width_in_luma_samples;
int pic_height_in_luma_samples;
int conf_win_left_offset;
int conf_win_right_offset;
int conf_win_top_offset;
int conf_win_bottom_offset;
int bit_depth_luma_minus8;
int bit_depth_chroma_minus8;
int log2_max_pic_order_cnt_lsb_minus4;
int sps_max_dec_pic_buffering_minus1[kMaxSubLayers];
int sps_max_num_reorder_pics[kMaxSubLayers];
int sps_max_latency_increase_plus1[kMaxSubLayers];
int log2_min_luma_coding_block_size_minus3;
int log2_diff_max_min_luma_coding_block_size;
int log2_min_luma_transform_block_size_minus2;
int log2_diff_max_min_luma_transform_block_size;
int max_transform_hierarchy_depth_inter;
int max_transform_hierarchy_depth_intra;
bool scaling_list_enabled_flag;
bool sps_scaling_list_data_present_flag;
H265ScalingListData scaling_list_data;
bool amp_enabled_flag;
bool sample_adaptive_offset_enabled_flag;
bool pcm_enabled_flag;
int pcm_sample_bit_depth_luma_minus1;
int pcm_sample_bit_depth_chroma_minus1;
int log2_min_pcm_luma_coding_block_size_minus3;
int log2_diff_max_min_pcm_luma_coding_block_size;
bool pcm_loop_filter_disabled_flag;
int num_short_term_ref_pic_sets;
H265StRefPicSet st_ref_pic_set[kMaxShortTermRefPicSets];
bool long_term_ref_pics_present_flag;
int num_long_term_ref_pics_sps;
int lt_ref_pic_poc_lsb_sps[kMaxLongTermRefPicSets];
bool used_by_curr_pic_lt_sps_flag[kMaxLongTermRefPicSets];
bool sps_temporal_mvp_enabled_flag;
bool strong_intra_smoothing_enabled_flag;
H265VUIParameters vui_parameters;
// Calculated fields.
int chroma_array_type;
int sub_width_c;
int sub_height_c;
size_t max_dpb_size;
int bit_depth_y;
int bit_depth_c;
int max_pic_order_cnt_lsb;
int sps_max_latency_pictures[kMaxSubLayers];
int ctb_log2_size_y;
int pic_width_in_ctbs_y;
int pic_height_in_ctbs_y;
int pic_size_in_ctbs_y;
int wp_offset_half_range_y;
int wp_offset_half_range_c;
// Helpers to compute frequently-used values. They do not verify that the
// results are in-spec for the given profile or level.
gfx::Size GetCodedSize() const;
gfx::Rect GetVisibleRect() const;
VideoColorSpace GetColorSpace() const;
};
struct MEDIA_EXPORT H265PPS {
H265PPS();
enum {
kMaxNumTileColumnWidth = 19, // From VAAPI.
kMaxNumTileRowHeight = 21, // From VAAPI.
};
int temporal_id; // calculated from NALU
// Syntax elements.
int pps_pic_parameter_set_id;
int pps_seq_parameter_set_id;
bool dependent_slice_segments_enabled_flag;
bool output_flag_present_flag;
int num_extra_slice_header_bits;
bool sign_data_hiding_enabled_flag;
bool cabac_init_present_flag;
int num_ref_idx_l0_default_active_minus1;
int num_ref_idx_l1_default_active_minus1;
int init_qp_minus26;
bool constrained_intra_pred_flag;
bool transform_skip_enabled_flag;
bool cu_qp_delta_enabled_flag;
int diff_cu_qp_delta_depth;
int pps_cb_qp_offset;
int pps_cr_qp_offset;
bool pps_slice_chroma_qp_offsets_present_flag;
bool weighted_pred_flag;
bool weighted_bipred_flag;
bool transquant_bypass_enabled_flag;
bool tiles_enabled_flag;
bool entropy_coding_sync_enabled_flag;
int num_tile_columns_minus1;
int num_tile_rows_minus1;
bool uniform_spacing_flag;
int column_width_minus1[kMaxNumTileColumnWidth];
int row_height_minus1[kMaxNumTileRowHeight];
bool loop_filter_across_tiles_enabled_flag;
bool pps_loop_filter_across_slices_enabled_flag;
bool deblocking_filter_override_enabled_flag;
bool pps_deblocking_filter_disabled_flag;
int pps_beta_offset_div2;
int pps_tc_offset_div2;
bool pps_scaling_list_data_present_flag;
H265ScalingListData scaling_list_data;
bool lists_modification_present_flag;
int log2_parallel_merge_level_minus2;
bool slice_segment_header_extension_present_flag;
// Calculated fields.
int qp_bd_offset_y;
};
struct MEDIA_EXPORT H265RefPicListsModifications {
H265RefPicListsModifications();
// Syntax elements.
bool ref_pic_list_modification_flag_l0;
int list_entry_l0[kMaxRefIdxActive];
bool ref_pic_list_modification_flag_l1;
int list_entry_l1[kMaxRefIdxActive];
};
struct MEDIA_EXPORT H265PredWeightTable {
H265PredWeightTable();
// Syntax elements.
int luma_log2_weight_denom;
int delta_chroma_log2_weight_denom;
int chroma_log2_weight_denom;
int delta_luma_weight_l0[kMaxRefIdxActive];
int luma_offset_l0[kMaxRefIdxActive];
int delta_chroma_weight_l0[kMaxRefIdxActive][2];
int delta_chroma_offset_l0[kMaxRefIdxActive][2];
int delta_luma_weight_l1[kMaxRefIdxActive];
int luma_offset_l1[kMaxRefIdxActive];
int delta_chroma_weight_l1[kMaxRefIdxActive][2];
int delta_chroma_offset_l1[kMaxRefIdxActive][2];
};
struct MEDIA_EXPORT H265SliceHeader {
H265SliceHeader();
enum {
kSliceTypeB = 0, // Table 7-7
kSliceTypeP = 1, // Table 7-7
kSliceTypeI = 2, // Table 7-7
};
int nal_unit_type; // from NAL header
const uint8_t* nalu_data; // from NAL header
size_t nalu_size; // from NAL header
size_t header_size; // calculated, not including emulation prevention bytes
size_t header_emulation_prevention_bytes;
// Syntax elements.
bool first_slice_segment_in_pic_flag;
bool no_output_of_prior_pics_flag;
int slice_pic_parameter_set_id;
bool dependent_slice_segment_flag;
int slice_segment_address;
int slice_type;
bool pic_output_flag;
int colour_plane_id;
int slice_pic_order_cnt_lsb;
bool short_term_ref_pic_set_sps_flag;
H265StRefPicSet st_ref_pic_set;
int short_term_ref_pic_set_idx;
int num_long_term_sps;
int num_long_term_pics;
int poc_lsb_lt[kMaxLongTermRefPicSets];
bool used_by_curr_pic_lt[kMaxLongTermRefPicSets];
bool delta_poc_msb_present_flag[kMaxLongTermRefPicSets];
int delta_poc_msb_cycle_lt[kMaxLongTermRefPicSets];
bool slice_temporal_mvp_enabled_flag;
bool slice_sao_luma_flag;
bool slice_sao_chroma_flag;
bool num_ref_idx_active_override_flag;
int num_ref_idx_l0_active_minus1;
int num_ref_idx_l1_active_minus1;
H265RefPicListsModifications ref_pic_lists_modification;
bool mvd_l1_zero_flag;
bool cabac_init_flag;
bool collocated_from_l0_flag;
int collocated_ref_idx;
H265PredWeightTable pred_weight_table;
int five_minus_max_num_merge_cand;
int slice_qp_delta;
int slice_cb_qp_offset;
int slice_cr_qp_offset;
bool slice_deblocking_filter_disabled_flag;
int slice_beta_offset_div2;
int slice_tc_offset_div2;
bool slice_loop_filter_across_slices_enabled_flag;
// Calculated.
int curr_rps_idx;
int num_pic_total_curr;
bool irap_pic;
// Number of bits st_ref_pic_set takes after removing emulation prevention
// bytes.
int st_rps_bits;
bool IsISlice() const;
bool IsPSlice() const;
bool IsBSlice() const;
const H265StRefPicSet& GetStRefPicSet(const H265SPS* sps) const {
if (curr_rps_idx == sps->num_short_term_ref_pic_sets)
return st_ref_pic_set;
return sps->st_ref_pic_set[curr_rps_idx];
}
};
// Class to parse an Annex-B H.265 stream.
class MEDIA_EXPORT H265Parser {
public:
enum Result {
kOk,
kInvalidStream, // error in stream
kUnsupportedStream, // stream not supported by the parser
kMissingParameterSet, // missing PPS/SPS from what was parsed
kEOStream, // end of stream
};
H265Parser();
~H265Parser();
void Reset();
// Set current stream pointer to |stream| of |stream_size| in bytes,
// |stream| owned by caller.
// |subsamples| contains information about what parts of |stream| are
// encrypted.
void SetStream(const uint8_t* stream, off_t stream_size);
void SetEncryptedStream(const uint8_t* stream,
off_t stream_size,
const std::vector<SubsampleEntry>& subsamples);
// Read the stream to find the next NALU, identify it and return
// that information in |*nalu|. This advances the stream to the beginning
// of this NALU, but not past it, so subsequent calls to NALU-specific
// parsing functions (ParseSPS, etc.) will parse this NALU.
// If the caller wishes to skip the current NALU, it can call this function
// again, instead of any NALU-type specific parse functions below.
Result AdvanceToNextNALU(H265NALU* nalu);
// NALU-specific parsing functions.
// These should be called after AdvanceToNextNALU().
// SPSes and PPSes are owned by the parser class and the memory for their
// structures is managed here, not by the caller, as they are reused across
// NALUs.
//
// Parse an SPS/PPS NALU and save their data in the parser, returning id
// of the parsed structure in |*pps_id|/|*sps_id|. To get a pointer to a given
// SPS/PPS structure, use GetSPS()/GetPPS(), passing the returned
// |*sps_id|/|*pps_id| as parameter.
Result ParseSPS(int* sps_id);
Result ParsePPS(const H265NALU& nalu, int* pps_id);
// Return a pointer to SPS/PPS with given |sps_id|/|pps_id| or null if not
// present.
const H265SPS* GetSPS(int sps_id) const;
const H265PPS* GetPPS(int pps_id) const;
// Slice headers and are not used across NALUs by the parser and can be
// discarded after current NALU, so the parser does not store them, nor does
// it manage their memory. The caller has to provide and manage it instead.
// Parse a slice header, returning it in |*shdr|. |*nalu| must be set to
// the NALU returned from AdvanceToNextNALU() and corresponding to |*shdr|.
// |prior_shdr| should be the last parsed header in decoding order for
// handling dependent slice segments. If |prior_shdr| is null and this is a
// dependent slice segment, an error will be returned.
Result ParseSliceHeader(const H265NALU& nalu,
H265SliceHeader* shdr,
H265SliceHeader* prior_shdr);
static VideoCodecProfile ProfileIDCToVideoCodecProfile(int profile_idc);
// The return value of this method changes for every successful call to
// AdvanceToNextNALU().
// This returns the subsample information for the last NALU that was output
// from AdvanceToNextNALU().
std::vector<SubsampleEntry> GetCurrentSubsamples();
private:
// Move the stream pointer to the beginning of the next NALU,
// i.e. pointing at the next start code.
// Return true if a NALU has been found.
// If a NALU is found:
// - its size in bytes is returned in |*nalu_size| and includes
// the start code as well as the trailing zero bits.
// - the size in bytes of the start code is returned in |*start_code_size|.
bool LocateNALU(off_t* nalu_size, off_t* start_code_size);
// Exp-Golomb code parsing as specified in chapter 9.2 of the spec.
// Read one unsigned exp-Golomb code from the stream and return in |*val|.
Result ReadUE(int* val);
// Read one signed exp-Golomb code from the stream and return in |*val|.
Result ReadSE(int* val);
Result ParseProfileTierLevel(bool profile_present,
int max_num_sub_layers_minus1,
H265ProfileTierLevel* profile_tier_level);
Result ParseScalingListData(H265ScalingListData* scaling_list_data);
Result ParseStRefPicSet(int st_rps_idx,
const H265SPS& sps,
H265StRefPicSet* st_ref_pic_set);
Result ParseVuiParameters(const H265SPS& sps, H265VUIParameters* vui);
Result ParseAndIgnoreHrdParameters(bool common_inf_present_flag,
int max_num_sub_layers_minus1);
Result ParseAndIgnoreSubLayerHrdParameters(
int cpb_cnt,
bool sub_pic_hrd_params_present_flag);
Result ParseRefPicListsModifications(const H265SliceHeader& shdr,
H265RefPicListsModifications* rpl_mod);
Result ParsePredWeightTable(const H265SPS& sps,
const H265SliceHeader& shdr,
H265PredWeightTable* pred_weight_table);
// Pointer to the current NALU in the stream.
const uint8_t* stream_;
// Bytes left in the stream after the current NALU.
off_t bytes_left_;
H264BitReader br_;
// PPSes and SPSes stored for future reference.
base::flat_map<int, std::unique_ptr<H265SPS>> active_sps_;
base::flat_map<int, std::unique_ptr<H265PPS>> active_pps_;
// Ranges of encrypted bytes in the buffer passed to SetEncryptedStream().
Ranges<const uint8_t*> encrypted_ranges_;
// This contains the range of the previous NALU found in
// AdvanceToNextNalu(). Holds exactly one range.
Ranges<const uint8_t*> previous_nalu_range_;
DISALLOW_COPY_AND_ASSIGN(H265Parser);
};
} // namespace media
#endif // MEDIA_VIDEO_H265_PARSER_H_