From 6c5fb0208451b8fbfc15087465ea8c3dc9421c92 Mon Sep 17 00:00:00 2001 From: Qiu Jianlin Date: Mon, 22 Oct 2018 10:36:54 +0800 Subject: [PATCH] Rebase all changes for HEVC to M70 --- ...builtin_video_bitrate_allocator_factory.cc | 1 + api/video/video_codec_type.h | 3 + api/video_codecs/video_codec.cc | 36 + api/video_codecs/video_codec.h | 24 + api/video_codecs/video_encoder.cc | 17 + api/video_codecs/video_encoder.h | 4 +- api/video_codecs/video_encoder_config.cc | 11 + api/video_codecs/video_encoder_config.h | 14 +- common_video/BUILD.gn | 13 + common_video/h265/h265_common.cc | 110 +++ common_video/h265/h265_common.h | 100 +++ common_video/h265/h265_pps_parser.cc | 209 ++++++ common_video/h265/h265_pps_parser.h | 64 ++ common_video/h265/h265_sps_parser.cc | 192 ++++++ common_video/h265/h265_sps_parser.h | 54 ++ common_video/h265/h265_vps_parser.cc | 62 ++ common_video/h265/h265_vps_parser.h | 43 ++ media/base/media_constants.cc | 12 +- media/base/media_constants.h | 11 +- modules/rtp_rtcp/BUILD.gn | 11 + modules/rtp_rtcp/source/rtp_format.cc | 19 + modules/rtp_rtcp/source/rtp_format_h265.cc | 645 ++++++++++++++++++ modules/rtp_rtcp/source/rtp_format_h265.h | 129 ++++ modules/rtp_rtcp/source/rtp_sender_video.cc | 12 +- modules/rtp_rtcp/source/rtp_video_header.h | 11 + modules/video_coding/BUILD.gn | 7 + .../codecs/h265/include/h265_globals.h | 62 ++ .../multiplex/multiplex_encoder_adapter.cc | 6 + modules/video_coding/encoded_frame.cc | 6 + .../video_coding/h265_vps_sps_pps_tracker.cc | 321 +++++++++ .../video_coding/h265_vps_sps_pps_tracker.h | 65 ++ modules/video_coding/jitter_buffer_common.h | 3 + modules/video_coding/packet_buffer.cc | 35 +- modules/video_coding/session_info.cc | 69 +- modules/video_coding/session_info.h | 4 +- test/video_codec_settings.h | 8 +- video/rtp_video_stream_receiver.cc | 17 +- video/rtp_video_stream_receiver.h | 8 + video/send_statistics_proxy.cc | 6 + video/video_receive_stream.cc | 6 +- video/video_stream_encoder.cc | 8 +- 41 files changed, 2421 insertions(+), 17 deletions(-) create mode 100644 common_video/h265/h265_common.cc create mode 100644 common_video/h265/h265_common.h create mode 100644 common_video/h265/h265_pps_parser.cc create mode 100644 common_video/h265/h265_pps_parser.h create mode 100644 common_video/h265/h265_sps_parser.cc create mode 100644 common_video/h265/h265_sps_parser.h create mode 100644 common_video/h265/h265_vps_parser.cc create mode 100644 common_video/h265/h265_vps_parser.h create mode 100644 modules/rtp_rtcp/source/rtp_format_h265.cc create mode 100644 modules/rtp_rtcp/source/rtp_format_h265.h create mode 100644 modules/video_coding/codecs/h265/include/h265_globals.h create mode 100644 modules/video_coding/h265_vps_sps_pps_tracker.cc create mode 100644 modules/video_coding/h265_vps_sps_pps_tracker.h diff --git a/api/video/builtin_video_bitrate_allocator_factory.cc b/api/video/builtin_video_bitrate_allocator_factory.cc index 89913673396..4f3f6683278 100644 --- a/api/video/builtin_video_bitrate_allocator_factory.cc +++ b/api/video/builtin_video_bitrate_allocator_factory.cc @@ -40,6 +40,7 @@ class BuiltinVideoBitrateAllocatorFactory case kVideoCodecVP9: rate_allocator.reset(new SvcRateAllocator(codec)); break; + // TODO: add an allocator here for H.265 default: rate_allocator.reset(new DefaultVideoBitrateAllocator(codec)); } diff --git a/api/video/video_codec_type.h b/api/video/video_codec_type.h index 2e406c07554..62aabf4dfbc 100644 --- a/api/video/video_codec_type.h +++ b/api/video/video_codec_type.h @@ -21,6 +21,9 @@ enum VideoCodecType { kVideoCodecVP8, kVideoCodecVP9, kVideoCodecH264, +#ifndef DISABLE_H265 + kVideoCodecH265, +#endif kVideoCodecMultiplex, }; diff --git a/api/video_codecs/video_codec.cc b/api/video_codecs/video_codec.cc index 3566aefec63..bf3f7c4b563 100644 --- a/api/video_codecs/video_codec.cc +++ b/api/video_codecs/video_codec.cc @@ -45,6 +45,17 @@ bool VideoCodecH264::operator==(const VideoCodecH264& other) const { numberOfTemporalLayers == other.numberOfTemporalLayers); } +#ifndef DISABLE_H265 +bool VideoCodecH265::operator==(const VideoCodecH265& other) const { + return (frameDroppingOn == other.frameDroppingOn && + keyFrameInterval == other.keyFrameInterval && + vpsLen == other.vpsLen && spsLen == other.spsLen && + ppsLen == other.ppsLen && + (spsLen == 0 || memcmp(spsData, other.spsData, spsLen) == 0) && + (ppsLen == 0 || memcmp(ppsData, other.ppsData, ppsLen) == 0)); +} +#endif + bool SpatialLayer::operator==(const SpatialLayer& other) const { return (width == other.width && height == other.height && numberOfTemporalLayers == other.numberOfTemporalLayers && @@ -103,9 +114,25 @@ const VideoCodecH264& VideoCodec::H264() const { return codec_specific_.H264; } +#ifndef DISABLE_H265 +VideoCodecH265* VideoCodec::H265() { + RTC_DCHECK_EQ(codecType, kVideoCodecH265); + return &codec_specific_.H265; +} + +const VideoCodecH265& VideoCodec::H265() const { + RTC_DCHECK_EQ(codecType, kVideoCodecH265); + return codec_specific_.H265; +} +#endif + static const char* kPayloadNameVp8 = "VP8"; static const char* kPayloadNameVp9 = "VP9"; static const char* kPayloadNameH264 = "H264"; +#ifndef DISABLE_H265 +static const char* kPayloadNameH265 = "H265"; +#endif +static const char* kPayloadNameI420 = "I420"; static const char* kPayloadNameGeneric = "Generic"; static const char* kPayloadNameMultiplex = "Multiplex"; @@ -117,6 +144,10 @@ const char* CodecTypeToPayloadString(VideoCodecType type) { return kPayloadNameVp9; case kVideoCodecH264: return kPayloadNameH264; +#ifndef DISABLE_H265 + case kVideoCodecH265: + return kPayloadNameH265; +#endif // Other codecs default to generic. default: return kPayloadNameGeneric; @@ -132,6 +163,11 @@ VideoCodecType PayloadStringToCodecType(const std::string& name) { return kVideoCodecH264; if (absl::EqualsIgnoreCase(name, kPayloadNameMultiplex)) return kVideoCodecMultiplex; +#ifndef DISABLE_H265 + if (absl::EqualsIgnoreCase(name, kPayloadNameH265)) + return kVideoCodecH265; +#endif + return kVideoCodecMultiplex; return kVideoCodecGeneric; } diff --git a/api/video_codecs/video_codec.h b/api/video_codecs/video_codec.h index d014e7c3a94..95290290fa8 100644 --- a/api/video_codecs/video_codec.h +++ b/api/video_codecs/video_codec.h @@ -82,6 +82,23 @@ struct VideoCodecH264 { uint8_t numberOfTemporalLayers; }; +#ifndef DISABLE_H265 +struct VideoCodecH265 { + bool operator==(const VideoCodecH265& other) const; + bool operator!=(const VideoCodecH265& other) const { + return !(*this == other); + } + bool frameDroppingOn; + int keyFrameInterval; + const uint8_t* vpsData; + size_t vpsLen; + const uint8_t* spsData; + size_t spsLen; + const uint8_t* ppsData; + size_t ppsLen; +}; +#endif + // Translates from name of codec to codec type and vice versa. RTC_EXPORT const char* CodecTypeToPayloadString(VideoCodecType type); RTC_EXPORT VideoCodecType PayloadStringToCodecType(const std::string& name); @@ -90,6 +107,9 @@ union VideoCodecUnion { VideoCodecVP8 VP8; VideoCodecVP9 VP9; VideoCodecH264 H264; +#ifndef DISABLE_H265 + VideoCodecH265 H265; +#endif }; enum class VideoCodecMode { kRealtimeVideo, kScreensharing }; @@ -151,6 +171,10 @@ class RTC_EXPORT VideoCodec { const VideoCodecVP9& VP9() const; VideoCodecH264* H264(); const VideoCodecH264& H264() const; +#ifndef DISABLE_H265 + VideoCodecH265* H265(); + const VideoCodecH265& H265() const; +#endif private: // TODO(hta): Consider replacing the union with a pointer type. diff --git a/api/video_codecs/video_encoder.cc b/api/video_codecs/video_encoder.cc index 710d90d2d67..fdee9fb72aa 100644 --- a/api/video_codecs/video_encoder.cc +++ b/api/video_codecs/video_encoder.cc @@ -58,6 +58,23 @@ VideoCodecH264 VideoEncoder::GetDefaultH264Settings() { return h264_settings; } +#ifndef DISABLE_H265 +VideoCodecH265 VideoEncoder::GetDefaultH265Settings() { + VideoCodecH265 h265_settings; + memset(&h265_settings, 0, sizeof(h265_settings)); + + // h265_settings.profile = kProfileBase; + h265_settings.frameDroppingOn = true; + h265_settings.keyFrameInterval = 3000; + h265_settings.spsData = nullptr; + h265_settings.spsLen = 0; + h265_settings.ppsData = nullptr; + h265_settings.ppsLen = 0; + + return h265_settings; +} +#endif + VideoEncoder::ScalingSettings::ScalingSettings() = default; VideoEncoder::ScalingSettings::ScalingSettings(KOff) : ScalingSettings() {} diff --git a/api/video_codecs/video_encoder.h b/api/video_codecs/video_encoder.h index c01309f3e60..542a3e19f3d 100644 --- a/api/video_codecs/video_encoder.h +++ b/api/video_codecs/video_encoder.h @@ -240,7 +240,9 @@ class RTC_EXPORT VideoEncoder { static VideoCodecVP8 GetDefaultVp8Settings(); static VideoCodecVP9 GetDefaultVp9Settings(); static VideoCodecH264 GetDefaultH264Settings(); - +#ifndef DISABLE_H265 + static VideoCodecH265 GetDefaultH265Settings(); +#endif virtual ~VideoEncoder() {} // Initialize the encoder with the information from the codecSettings diff --git a/api/video_codecs/video_encoder_config.cc b/api/video_codecs/video_encoder_config.cc index 2b1adc021b8..a775018c126 100644 --- a/api/video_codecs/video_encoder_config.cc +++ b/api/video_codecs/video_encoder_config.cc @@ -92,6 +92,10 @@ void VideoEncoderConfig::EncoderSpecificSettings::FillEncoderSpecificSettings( FillVideoCodecVp8(codec->VP8()); } else if (codec->codecType == kVideoCodecVP9) { FillVideoCodecVp9(codec->VP9()); +#ifndef DISABLE_H265 + } else if (codec->codecType == kVideoCodecH265) { + FillVideoCodecH265(codec->H265()); +#endif } else { RTC_NOTREACHED() << "Encoder specifics set/used for unknown codec type."; } @@ -102,6 +106,13 @@ void VideoEncoderConfig::EncoderSpecificSettings::FillVideoCodecH264( RTC_NOTREACHED(); } +#ifndef DISABLE_H265 +void VideoEncoderConfig::EncoderSpecificSettings::FillVideoCodecH265( + VideoCodecH265* h265_settings) const { + RTC_NOTREACHED(); +} +#endif + void VideoEncoderConfig::EncoderSpecificSettings::FillVideoCodecVp8( VideoCodecVP8* vp8_settings) const { RTC_NOTREACHED(); diff --git a/api/video_codecs/video_encoder_config.h b/api/video_codecs/video_encoder_config.h index 0c69b932885..27912149d4d 100644 --- a/api/video_codecs/video_encoder_config.h +++ b/api/video_codecs/video_encoder_config.h @@ -64,7 +64,9 @@ class VideoEncoderConfig { virtual void FillVideoCodecVp8(VideoCodecVP8* vp8_settings) const; virtual void FillVideoCodecVp9(VideoCodecVP9* vp9_settings) const; virtual void FillVideoCodecH264(VideoCodecH264* h264_settings) const; - +#ifndef DISABLE_H265 + virtual void FillVideoCodecH265(VideoCodecH265* h265_settings) const; +#endif private: ~EncoderSpecificSettings() override {} friend class VideoEncoderConfig; @@ -79,6 +81,16 @@ class VideoEncoderConfig { VideoCodecH264 specifics_; }; +#ifndef DISABLE_H265 + class H265EncoderSpecificSettings : public EncoderSpecificSettings { + public: + explicit H265EncoderSpecificSettings(const VideoCodecH265& specifics); + void FillVideoCodecH265(VideoCodecH265* h265_settings) const override; + + private: + VideoCodecH264 specifics_; + }; +#endif class Vp8EncoderSpecificSettings : public EncoderSpecificSettings { public: explicit Vp8EncoderSpecificSettings(const VideoCodecVP8& specifics); diff --git a/common_video/BUILD.gn b/common_video/BUILD.gn index 10a646fa87b..d7ca775ac0e 100644 --- a/common_video/BUILD.gn +++ b/common_video/BUILD.gn @@ -39,6 +39,19 @@ rtc_static_library("common_video") { "video_render_frames.h", ] + if (rtc_use_h265) { + sources += [ + "h265/h265_common.cc", + "h265/h265_common.h", + "h265/h265_pps_parser.cc", + "h265/h265_pps_parser.h", + "h265/h265_sps_parser.cc", + "h265/h265_sps_parser.h", + "h265/h265_vps_parser.cc", + "h265/h265_vps_parser.h", + ] + } + deps = [ "../api:scoped_refptr", "../api/task_queue", diff --git a/common_video/h265/h265_common.cc b/common_video/h265/h265_common.cc new file mode 100644 index 00000000000..2b2cac22759 --- /dev/null +++ b/common_video/h265/h265_common.cc @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_common.h" + +namespace webrtc { +namespace H265 { + +const uint8_t kNaluTypeMask = 0x7E; + +std::vector FindNaluIndices(const uint8_t* buffer, + size_t buffer_size) { + // This is sorta like Boyer-Moore, but with only the first optimization step: + // given a 3-byte sequence we're looking at, if the 3rd byte isn't 1 or 0, + // skip ahead to the next 3-byte sequence. 0s and 1s are relatively rare, so + // this will skip the majority of reads/checks. + std::vector sequences; + if (buffer_size < kNaluShortStartSequenceSize) + return sequences; + + const size_t end = buffer_size - kNaluShortStartSequenceSize; + for (size_t i = 0; i < end;) { + if (buffer[i + 2] > 1) { + i += 3; + } else if (buffer[i + 2] == 1 && buffer[i + 1] == 0 && buffer[i] == 0) { + // We found a start sequence, now check if it was a 3 of 4 byte one. + NaluIndex index = {i, i + 3, 0}; + if (index.start_offset > 0 && buffer[index.start_offset - 1] == 0) + --index.start_offset; + + // Update length of previous entry. + auto it = sequences.rbegin(); + if (it != sequences.rend()) + it->payload_size = index.start_offset - it->payload_start_offset; + + sequences.push_back(index); + + i += 3; + } else { + ++i; + } + } + + // Update length of last entry, if any. + auto it = sequences.rbegin(); + if (it != sequences.rend()) + it->payload_size = buffer_size - it->payload_start_offset; + + return sequences; +} + +NaluType ParseNaluType(uint8_t data) { + return static_cast((data & kNaluTypeMask) >> 1); +} + +std::vector ParseRbsp(const uint8_t* data, size_t length) { + std::vector out; + out.reserve(length); + + for (size_t i = 0; i < length;) { + // Be careful about over/underflow here. byte_length_ - 3 can underflow, and + // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_ + // above, and that expression will produce the number of bytes left in + // the stream including the byte at i. + if (length - i >= 3 && !data[i] && !data[i + 1] && data[i + 2] == 3) { + // Two rbsp bytes. + out.push_back(data[i++]); + out.push_back(data[i++]); + // Skip the emulation byte. + i++; + } else { + // Single rbsp byte. + out.push_back(data[i++]); + } + } + return out; +} + +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination) { + static const uint8_t kZerosInStartSequence = 2; + static const uint8_t kEmulationByte = 0x03u; + size_t num_consecutive_zeros = 0; + destination->EnsureCapacity(destination->size() + length); + + for (size_t i = 0; i < length; ++i) { + uint8_t byte = bytes[i]; + if (byte <= kEmulationByte && + num_consecutive_zeros >= kZerosInStartSequence) { + // Need to escape. + destination->AppendData(kEmulationByte); + num_consecutive_zeros = 0; + } + destination->AppendData(byte); + if (byte == 0) { + ++num_consecutive_zeros; + } else { + num_consecutive_zeros = 0; + } + } +} + +} // namespace H265 +} // namespace webrtc diff --git a/common_video/h265/h265_common.h b/common_video/h265/h265_common.h new file mode 100644 index 00000000000..97acb42bb61 --- /dev/null +++ b/common_video/h265/h265_common.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_COMMON_H_ +#define COMMON_VIDEO_H265_H265_COMMON_H_ + +#include +#include + +#include "rtc_base/buffer.h" + +namespace webrtc { + +namespace H265 { +// The size of a full NALU start sequence {0 0 0 1}, used for the first NALU +// of an access unit, and for SPS and PPS blocks. +const size_t kNaluLongStartSequenceSize = 4; + +// The size of a shortened NALU start sequence {0 0 1}, that may be used if +// not the first NALU of an access unit or an SPS or PPS block. +const size_t kNaluShortStartSequenceSize = 3; + +// The size of the NALU type byte (1). +const size_t kNaluTypeSize = 1; + +enum NaluType : uint8_t { + kTrailN = 0, + kTrailR = 1, + kTsaN = 2, + kTsaR = 3, + kStsaN = 4, + kStsaR = 5, + kRadlN = 6, + kRadlR = 7, + kBlaWLp = 16, + kBlaWRadl = 17, + kBlaNLp = 18, + kIdrWRadl = 19, + kIdr = 20, + kCra = 21, + kRsvIrapVcl23 = 23, + kVps = 32, + kSps = 33, + kPps = 34, + kAud = 35, + kPrefixSei = 39, + kSuffixSei = 40, + kAP = 48, + kFU = 49 +}; + +enum SliceType : uint8_t { kP = 0, kB = 1, kI = 2, kSp = 3, kSi = 4 }; + +struct NaluIndex { + // Start index of NALU, including start sequence. + size_t start_offset; + // Start index of NALU payload, typically type header. + size_t payload_start_offset; + // Length of NALU payload, in bytes, counting from payload_start_offset. + size_t payload_size; +}; + +// Returns a vector of the NALU indices in the given buffer. +std::vector FindNaluIndices(const uint8_t* buffer, + size_t buffer_size); + +// Get the NAL type from the header byte immediately following start sequence. +NaluType ParseNaluType(uint8_t data); + +// Methods for parsing and writing RBSP. See section 7.4.2 of the H265 spec. +// +// The following sequences are illegal, and need to be escaped when encoding: +// 00 00 00 -> 00 00 03 00 +// 00 00 01 -> 00 00 03 01 +// 00 00 02 -> 00 00 03 02 +// And things in the source that look like the emulation byte pattern (00 00 03) +// need to have an extra emulation byte added, so it's removed when decoding: +// 00 00 03 -> 00 00 03 03 +// +// Decoding is simply a matter of finding any 00 00 03 sequence and removing +// the 03 emulation byte. + +// Parse the given data and remove any emulation byte escaping. +std::vector ParseRbsp(const uint8_t* data, size_t length); + +// Write the given data to the destination buffer, inserting and emulation +// bytes in order to escape any data the could be interpreted as a start +// sequence. +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination); +} // namespace H265 +} // namespace webrtc + +#endif // COMMON_VIDEO_H265_H265_COMMON_H_ diff --git a/common_video/h265/h265_pps_parser.cc b/common_video/h265/h265_pps_parser.cc new file mode 100644 index 00000000000..418411ced32 --- /dev/null +++ b/common_video/h265/h265_pps_parser.cc @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_pps_parser.h" + +#include +#include + +#include "common_video/h264/h264_common.h" +#include "common_video/h265/h265_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/logging.h" + +#define RETURN_EMPTY_ON_FAIL(x) \ + if (!(x)) { \ + return absl::nullopt; \ + } + +namespace { +const int kMaxPicInitQpDeltaValue = 25; +const int kMinPicInitQpDeltaValue = -26; +} // namespace + +namespace webrtc { + +// General note: this is based off the 02/2018 version of the H.265 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 + +absl::optional H265PpsParser::ParsePps( + const uint8_t* data, + size_t length) { + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1 of the H.264 standard. + std::vector unpacked_buffer = H264::ParseRbsp(data, length); + rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size()); + return ParseInternal(&bit_buffer); +} + +bool H265PpsParser::ParsePpsIds(const uint8_t* data, + size_t length, + uint32_t* pps_id, + uint32_t* sps_id) { + RTC_DCHECK(pps_id); + RTC_DCHECK(sps_id); + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1 of the H.265 standard. + std::vector unpacked_buffer = H264::ParseRbsp(data, length); + rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size()); + return ParsePpsIdsInternal(&bit_buffer, pps_id, sps_id); +} + +absl::optional H265PpsParser::ParsePpsIdFromSliceSegmentLayerRbsp( + const uint8_t* data, + size_t length, + uint8_t nalu_type) { + rtc::BitBuffer slice_reader(data, length); + + // first_slice_segment_in_pic_flag: u(1) + uint32_t first_slice_segment_in_pic_flag = 0; + RETURN_EMPTY_ON_FAIL( + slice_reader.ReadBits(&first_slice_segment_in_pic_flag, 1)); + + if (nalu_type >= H265::NaluType::kBlaWLp && + nalu_type <= H265::NaluType::kRsvIrapVcl23) { + // no_output_of_prior_pics_flag: u(1) + RETURN_EMPTY_ON_FAIL(slice_reader.ConsumeBits(1)); + } + + // slice_pic_parameter_set_id: ue(v) + uint32_t slice_pic_parameter_set_id = 0; + if (!slice_reader.ReadExponentialGolomb(&slice_pic_parameter_set_id)) + return absl::nullopt; + + return slice_pic_parameter_set_id; +} + +absl::optional H265PpsParser::ParseInternal( + rtc::BitBuffer* bit_buffer) { + PpsState pps; + + RETURN_EMPTY_ON_FAIL(ParsePpsIdsInternal(bit_buffer, &pps.id, &pps.sps_id)); + + uint32_t bits_tmp; + uint32_t golomb_ignored; + // entropy_coding_mode_flag: u(1) + uint32_t entropy_coding_mode_flag; + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&entropy_coding_mode_flag, 1)); + pps.entropy_coding_mode_flag = entropy_coding_mode_flag != 0; + // bottom_field_pic_order_in_frame_present_flag: u(1) + uint32_t bottom_field_pic_order_in_frame_present_flag; + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadBits(&bottom_field_pic_order_in_frame_present_flag, 1)); + pps.bottom_field_pic_order_in_frame_present_flag = + bottom_field_pic_order_in_frame_present_flag != 0; + + // num_slice_groups_minus1: ue(v) + uint32_t num_slice_groups_minus1; + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadExponentialGolomb(&num_slice_groups_minus1)); + if (num_slice_groups_minus1 > 0) { + uint32_t slice_group_map_type; + // slice_group_map_type: ue(v) + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadExponentialGolomb(&slice_group_map_type)); + if (slice_group_map_type == 0) { + for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1; + ++i_group) { + // run_length_minus1[iGroup]: ue(v) + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + } + } else if (slice_group_map_type == 1) { + // TODO(sprang): Implement support for dispersed slice group map type. + // See 8.2.2.2 Specification for dispersed slice group map type. + } else if (slice_group_map_type == 2) { + for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1; + ++i_group) { + // top_left[iGroup]: ue(v) + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + // bottom_right[iGroup]: ue(v) + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + } + } else if (slice_group_map_type == 3 || slice_group_map_type == 4 || + slice_group_map_type == 5) { + // slice_group_change_direction_flag: u(1) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1)); + // slice_group_change_rate_minus1: ue(v) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + } else if (slice_group_map_type == 6) { + // pic_size_in_map_units_minus1: ue(v) + uint32_t pic_size_in_map_units_minus1; + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadExponentialGolomb(&pic_size_in_map_units_minus1)); + uint32_t slice_group_id_bits = 0; + uint32_t num_slice_groups = num_slice_groups_minus1 + 1; + // If num_slice_groups is not a power of two an additional bit is required + // to account for the ceil() of log2() below. + if ((num_slice_groups & (num_slice_groups - 1)) != 0) + ++slice_group_id_bits; + while (num_slice_groups > 0) { + num_slice_groups >>= 1; + ++slice_group_id_bits; + } + for (uint32_t i = 0; i <= pic_size_in_map_units_minus1; i++) { + // slice_group_id[i]: u(v) + // Represented by ceil(log2(num_slice_groups_minus1 + 1)) bits. + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadBits(&bits_tmp, slice_group_id_bits)); + } + } + } + // num_ref_idx_l0_default_active_minus1: ue(v) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + // num_ref_idx_l1_default_active_minus1: ue(v) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + // weighted_pred_flag: u(1) + uint32_t weighted_pred_flag; + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&weighted_pred_flag, 1)); + pps.weighted_pred_flag = weighted_pred_flag != 0; + // weighted_bipred_idc: u(2) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.weighted_bipred_idc, 2)); + + // pic_init_qp_minus26: se(v) + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadSignedExponentialGolomb(&pps.pic_init_qp_minus26)); + // Sanity-check parsed value + if (pps.pic_init_qp_minus26 > kMaxPicInitQpDeltaValue || + pps.pic_init_qp_minus26 < kMinPicInitQpDeltaValue) { + RETURN_EMPTY_ON_FAIL(false); + } + // pic_init_qs_minus26: se(v) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + // chroma_qp_index_offset: se(v) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + // deblocking_filter_control_present_flag: u(1) + // constrained_intra_pred_flag: u(1) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 2)); + // redundant_pic_cnt_present_flag: u(1) + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadBits(&pps.redundant_pic_cnt_present_flag, 1)); + + return pps; +} + +bool H265PpsParser::ParsePpsIdsInternal(rtc::BitBuffer* bit_buffer, + uint32_t* pps_id, + uint32_t* sps_id) { + // pic_parameter_set_id: ue(v) + if (!bit_buffer->ReadExponentialGolomb(pps_id)) + return false; + // seq_parameter_set_id: ue(v) + if (!bit_buffer->ReadExponentialGolomb(sps_id)) + return false; + return true; +} + +} // namespace webrtc diff --git a/common_video/h265/h265_pps_parser.h b/common_video/h265/h265_pps_parser.h new file mode 100644 index 00000000000..cfa471883dc --- /dev/null +++ b/common_video/h265/h265_pps_parser.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_PPS_PARSER_H_ +#define COMMON_VIDEO_H265_PPS_PARSER_H_ + +#include "absl/types/optional.h" + +namespace rtc { +class BitBuffer; +} + +namespace webrtc { + +// A class for parsing out picture parameter set (PPS) data from a H265 NALU. +class H265PpsParser { + public: + // The parsed state of the PPS. Only some select values are stored. + // Add more as they are actually needed. + struct PpsState { + PpsState() = default; + + bool bottom_field_pic_order_in_frame_present_flag = false; + bool weighted_pred_flag = false; + bool entropy_coding_mode_flag = false; + uint32_t weighted_bipred_idc = false; + uint32_t redundant_pic_cnt_present_flag = 0; + int pic_init_qp_minus26 = 0; + uint32_t id = 0; + uint32_t sps_id = 0; + }; + + // Unpack RBSP and parse PPS state from the supplied buffer. + static absl::optional ParsePps(const uint8_t* data, size_t length); + + static bool ParsePpsIds(const uint8_t* data, + size_t length, + uint32_t* pps_id, + uint32_t* sps_id); + + static absl::optional ParsePpsIdFromSliceSegmentLayerRbsp( + const uint8_t* data, + size_t length, + uint8_t nalu_type); + + protected: + // Parse the PPS state, for a bit buffer where RBSP decoding has already been + // performed. + static absl::optional ParseInternal(rtc::BitBuffer* bit_buffer); + static bool ParsePpsIdsInternal(rtc::BitBuffer* bit_buffer, + uint32_t* pps_id, + uint32_t* sps_id); +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_H265_PPS_PARSER_H_ diff --git a/common_video/h265/h265_sps_parser.cc b/common_video/h265/h265_sps_parser.cc new file mode 100644 index 00000000000..0433f4b0ee9 --- /dev/null +++ b/common_video/h265/h265_sps_parser.cc @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "common_video/h265/h265_common.h" +#include "common_video/h265/h265_sps_parser.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/logging.h" + +namespace { +typedef absl::optional OptionalSps; + +#define RETURN_EMPTY_ON_FAIL(x) \ + if (!(x)) { \ + return OptionalSps(); \ + } +} // namespace + +namespace webrtc { + +H265SpsParser::SpsState::SpsState() = default; + +// General note: this is based off the 02/2018 version of the H.265 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 + +// Unpack RBSP and parse SPS state from the supplied buffer. +absl::optional H265SpsParser::ParseSps( + const uint8_t* data, + size_t length) { + std::vector unpacked_buffer = H265::ParseRbsp(data, length); + rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size()); + return ParseSpsUpToVui(&bit_buffer); +} + +absl::optional H265SpsParser::ParseSpsUpToVui( + rtc::BitBuffer* buffer) { + // Now, we need to use a bit buffer to parse through the actual HEVC SPS + // format. See Section 7.3.2.2.1 ("General sequence parameter set data + // syntax") of the H.265 standard for a complete description. + // Since we only care about resolution, we ignore the majority of fields, but + // we still have to actively parse through a lot of the data, since many of + // the fields have variable size. + // We're particularly interested in: + // chroma_format_idc -> affects crop units + // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16). + // frame_crop_*_offset -> crop information + + SpsState sps; + + // The golomb values we have to read, not just consume. + uint32_t golomb_ignored; + + // separate_colour_plane_flag is optional (assumed 0), but has implications + // about the ChromaArrayType, which modifies how we treat crop coordinates. + uint32_t separate_colour_plane_flag = 0; + + // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is + // 0. It defaults to 1, when not specified. + uint32_t chroma_format_idc = 1; + + // sps_video_parameter_set_id: u(4) + uint32_t sps_video_parameter_set_id = 0; + RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps_video_parameter_set_id, 4)); + // sps_max_sub_layers_minus1: u(3) + uint32_t sps_max_sub_layers_minus1 = 0; + RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps_max_sub_layers_minus1, 3)); + // sps_temporal_id_nesting_flag: u(1) + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1)); + // profile_tier_level(1, sps_max_sub_layers_minus1). We are acutally not + // using them, so read/skip over it. + // general_profile_space+general_tier_flag+general_prfile_idc: u(8) + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1)); + // general_profile_compatabilitiy_flag[32] + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(4)); + // general_progressive_source_flag + interlaced_source_flag+ + // non-packed_constraint flag + frame_only_constraint_flag: u(4) + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(4)); + // general_profile_idc decided flags or reserved. u(43) + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(43)); + // general_inbld_flag or reserved 0: u(1) + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1)); + // general_level_idc: u(8) + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1)); + // if max_sub_layers_minus1 >=1, read the sublayer profile information + std::vector sub_layer_profile_present_flags; + std::vector sub_layer_level_present_flags; + uint32_t sub_layer_profile_present = 0; + uint32_t sub_layer_level_present = 0; + for (uint32_t i = 0; i < sps_max_sub_layers_minus1; i++) { + // sublayer_profile_present_flag and sublayer_level_presnet_flag: u(2) + RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sub_layer_profile_present, 1)); + RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sub_layer_level_present, 1)); + sub_layer_profile_present_flags.push_back(sub_layer_profile_present); + sub_layer_level_present_flags.push_back(sub_layer_level_present); + } + if (sps_max_sub_layers_minus1 > 0) { + for (uint32_t j = sps_max_sub_layers_minus1; j < 8; j++) { + // reserved 2 bits: u(2) + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(2)); + } + } + for (uint32_t k = 0; k < sps_max_sub_layers_minus1; k++) { + if (sub_layer_profile_present_flags[k]) { // + // sub_layer profile_space/tier_flag/profile_idc. ignored. u(8) + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1)); + // profile_compatability_flag: u(32) + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(4)); + // sub_layer progressive_source_flag/interlaced_source_flag/ + // non_packed_constraint_flag/frame_only_constraint_flag: u(4) + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(4)); + // following 43-bits are profile_idc specific. We simply read/skip it. + // u(43) + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(43)); + // 1-bit profile_idc specific inbld flag. We simply read/skip it. u(1) + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1)); + } + if (sub_layer_level_present_flags[k]) { + // sub_layer_level_idc: u(8) + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1)); + } + } + // sps_seq_parameter_set_id: ue(v) + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored)); + // chrome_format_idc: ue(v) + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&chroma_format_idc)); + if (chroma_format_idc == 3) { + // seperate_colour_plane_flag: u(1) + RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&separate_colour_plane_flag, 1)); + } + uint32_t pic_width_in_luma_samples = 0; + uint32_t pic_height_in_luma_samples = 0; + // pic_width_in_luma_samples: ue(v) + RETURN_EMPTY_ON_FAIL( + buffer->ReadExponentialGolomb(&pic_width_in_luma_samples)); + // pic_height_in_luma_samples: ue(v) + RETURN_EMPTY_ON_FAIL( + buffer->ReadExponentialGolomb(&pic_height_in_luma_samples)); + // conformance_window_flag: u(1) + uint32_t conformance_window_flag = 0; + RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&conformance_window_flag, 1)); + + uint32_t conf_win_left_offset = 0; + uint32_t conf_win_right_offset = 0; + uint32_t conf_win_top_offset = 0; + uint32_t conf_win_bottom_offset = 0; + if (conformance_window_flag) { + // conf_win_left_offset: ue(v) + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&conf_win_left_offset)); + // conf_win_right_offset: ue(v) + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&conf_win_right_offset)); + // conf_win_top_offset: ue(v) + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&conf_win_top_offset)); + // conf_win_bottom_offset: ue(v) + RETURN_EMPTY_ON_FAIL( + buffer->ReadExponentialGolomb(&conf_win_bottom_offset)); + } + + // Far enough! We don't use the rest of the SPS. + + sps.vps_id = sps_video_parameter_set_id; + + // Start with the resolution determined by the pic_width/pic_height fields. + sps.width = pic_width_in_luma_samples; + sps.height = pic_height_in_luma_samples; + + if (conformance_window_flag) { + int sub_width_c = ((1 == chroma_format_idc) || (2 == chroma_format_idc)) && + (0 == separate_colour_plane_flag) + ? 2 + : 1; + int sub_height_c = + (1 == chroma_format_idc) && (0 == separate_colour_plane_flag) ? 2 : 1; + // the offset includes the pixel within conformance window. so don't need to + // +1 as per spec + sps.width -= sub_width_c * (conf_win_right_offset + conf_win_left_offset); + sps.height -= sub_height_c * (conf_win_top_offset + conf_win_bottom_offset); + } + + return OptionalSps(sps); +} + +} // namespace webrtc diff --git a/common_video/h265/h265_sps_parser.h b/common_video/h265/h265_sps_parser.h new file mode 100644 index 00000000000..a0f86b69832 --- /dev/null +++ b/common_video/h265/h265_sps_parser.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_SPS_PARSER_H_ +#define COMMON_VIDEO_H265_H265_SPS_PARSER_H_ + +#include "absl/types/optional.h" + +namespace rtc { +class BitBuffer; +} + +namespace webrtc { + +// A class for parsing out sequence parameter set (SPS) data from an H265 NALU. +class H265SpsParser { + public: + // The parsed state of the SPS. Only some select values are stored. + // Add more as they are actually needed. + struct SpsState { + SpsState(); + + uint32_t width = 0; + uint32_t height = 0; + uint32_t delta_pic_order_always_zero_flag = 0; + uint32_t separate_colour_plane_flag = 0; + uint32_t frame_mbs_only_flag = 0; + uint32_t log2_max_frame_num_minus4 = 0; + uint32_t log2_max_pic_order_cnt_lsb_minus4 = 0; + uint32_t pic_order_cnt_type = 0; + uint32_t max_num_ref_frames = 0; + uint32_t vui_params_present = 0; + uint32_t id = 0; + uint32_t vps_id = 0; + }; + + // Unpack RBSP and parse SPS state from the supplied buffer. + static absl::optional ParseSps(const uint8_t* data, size_t length); + + protected: + // Parse the SPS state, up till the VUI part, for a bit buffer where RBSP + // decoding has already been performed. + static absl::optional ParseSpsUpToVui(rtc::BitBuffer* buffer); +}; + +} // namespace webrtc +#endif // COMMON_VIDEO_H265_H265_SPS_PARSER_H_ diff --git a/common_video/h265/h265_vps_parser.cc b/common_video/h265/h265_vps_parser.cc new file mode 100644 index 00000000000..2391ae51df0 --- /dev/null +++ b/common_video/h265/h265_vps_parser.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "common_video/h265/h265_common.h" +#include "common_video/h265/h265_vps_parser.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/logging.h" + +namespace { +typedef absl::optional OptionalVps; + +#define RETURN_EMPTY_ON_FAIL(x) \ + if (!(x)) { \ + return OptionalVps(); \ + } +} // namespace + +namespace webrtc { + +H265VpsParser::VpsState::VpsState() = default; + +// General note: this is based off the 02/2018 version of the H.265 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 + +// Unpack RBSP and parse SPS state from the supplied buffer. +absl::optional H265VpsParser::ParseVps( + const uint8_t* data, + size_t length) { + std::vector unpacked_buffer = H265::ParseRbsp(data, length); + rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size()); + return ParseInternal(&bit_buffer); +} + +absl::optional H265VpsParser::ParseInternal( + rtc::BitBuffer* buffer) { + // Now, we need to use a bit buffer to parse through the actual HEVC VPS + // format. See Section 7.3.2.1 ("Video parameter set RBSP syntax") of the + // H.265 standard for a complete description. + + VpsState vps; + + // vps_video_parameter_set_id: u(4) + uint32_t vps_video_parameter_set_id = 0; + RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&vps_video_parameter_set_id, 4)); + + vps.id = vps_video_parameter_set_id; + vps.id = 0; + return OptionalVps(vps); +} + +} // namespace webrtc diff --git a/common_video/h265/h265_vps_parser.h b/common_video/h265/h265_vps_parser.h new file mode 100644 index 00000000000..e8a2775f43d --- /dev/null +++ b/common_video/h265/h265_vps_parser.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_VPS_PARSER_H_ +#define COMMON_VIDEO_H265_H265_VPS_PARSER_H_ + +#include "absl/types/optional.h" + +namespace rtc { +class BitBuffer; +} + +namespace webrtc { + +// A class for parsing out sequence parameter set (VPS) data from an H265 NALU. +class H265VpsParser { + public: + // The parsed state of the VPS. Only some select values are stored. + // Add more as they are actually needed. + struct VpsState { + VpsState(); + + uint32_t id = 0; + }; + + // Unpack RBSP and parse VPS state from the supplied buffer. + static absl::optional ParseVps(const uint8_t* data, size_t length); + + protected: + // Parse the VPS state, for a bit buffer where RBSP decoding has already been + // performed. + static absl::optional ParseInternal(rtc::BitBuffer* bit_buffer); +}; + +} // namespace webrtc +#endif // COMMON_VIDEO_H265_H265_VPS_PARSER_H_ diff --git a/media/base/media_constants.cc b/media/base/media_constants.cc index 3b4d2f27f86..6ca9e15bf47 100644 --- a/media/base/media_constants.cc +++ b/media/base/media_constants.cc @@ -104,14 +104,22 @@ const char kComfortNoiseCodecName[] = "CN"; const char kVp8CodecName[] = "VP8"; const char kVp9CodecName[] = "VP9"; const char kH264CodecName[] = "H264"; - +#ifndef DISABLE_H265 +const char kH265CodecName[] = "H265"; +#endif // RFC 6184 RTP Payload Format for H.264 video const char kH264FmtpProfileLevelId[] = "profile-level-id"; const char kH264FmtpLevelAsymmetryAllowed[] = "level-asymmetry-allowed"; const char kH264FmtpPacketizationMode[] = "packetization-mode"; const char kH264FmtpSpropParameterSets[] = "sprop-parameter-sets"; const char kH264ProfileLevelConstrainedBaseline[] = "42e01f"; - +#ifndef DISABLE_H265 +// RFC 7798 RTP Payload Format for H.265 video +const char kH265FmtpProfileSpace[] = "profile-space"; +const char kH265FmtpProfileId[] = "profile-id"; +const char kH265FmtpTierFlag[] = "tier-flag"; +const char kH265FmtpLevelId[] = "level-id"; +#endif const int kDefaultVideoMaxFramerate = 60; const size_t kConferenceMaxNumSpatialLayers = 3; diff --git a/media/base/media_constants.h b/media/base/media_constants.h index a796474dc3d..b2d19c9f159 100644 --- a/media/base/media_constants.h +++ b/media/base/media_constants.h @@ -133,7 +133,9 @@ extern const char kComfortNoiseCodecName[]; RTC_EXPORT extern const char kVp8CodecName[]; RTC_EXPORT extern const char kVp9CodecName[]; RTC_EXPORT extern const char kH264CodecName[]; - +#ifndef DISABLE_H265 +RTC_EXPORT extern const char kH265CodecName[]; +#endif // RFC 6184 RTP Payload Format for H.264 video RTC_EXPORT extern const char kH264FmtpProfileLevelId[]; RTC_EXPORT extern const char kH264FmtpLevelAsymmetryAllowed[]; @@ -141,6 +143,13 @@ RTC_EXPORT extern const char kH264FmtpPacketizationMode[]; extern const char kH264FmtpSpropParameterSets[]; extern const char kH264ProfileLevelConstrainedBaseline[]; +#ifndef DISABLE_H265 +// RFC 7798 RTP Payload Format for H.265 video +RTC_EXPORT extern const char kH265FmtpProfileSpace[]; +RTC_EXPORT extern const char kH265FmtpProfileId[]; +RTC_EXPORT extern const char kH265FmtpTierFlag[]; +RTC_EXPORT extern const char kH265FmtpLevelId[]; +#endif extern const int kDefaultVideoMaxFramerate; extern const size_t kConferenceMaxNumSpatialLayers; diff --git a/modules/rtp_rtcp/BUILD.gn b/modules/rtp_rtcp/BUILD.gn index 02c7207279b..eec93dd947b 100644 --- a/modules/rtp_rtcp/BUILD.gn +++ b/modules/rtp_rtcp/BUILD.gn @@ -197,6 +197,17 @@ rtc_static_library("rtp_rtcp") { defines = [ "BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0" ] } + if (rtc_use_h265) { + sources += [ + "source/rtp_format_h265.cc", + "source/rtp_format_h265.h", + ] + } + + if (!rtc_use_h265) { + defines += ["DISABLE_H265"] + } + deps = [ ":rtp_rtcp_format", ":rtp_video_header", diff --git a/modules/rtp_rtcp/source/rtp_format.cc b/modules/rtp_rtcp/source/rtp_format.cc index e870232a4d5..84177117c67 100644 --- a/modules/rtp_rtcp/source/rtp_format.cc +++ b/modules/rtp_rtcp/source/rtp_format.cc @@ -13,10 +13,16 @@ #include "absl/memory/memory.h" #include "absl/types/variant.h" #include "modules/rtp_rtcp/source/rtp_format_h264.h" +#ifndef DISABLE_H265 +#include "modules/rtp_rtcp/source/rtp_format_h265.h" +#endif #include "modules/rtp_rtcp/source/rtp_format_video_generic.h" #include "modules/rtp_rtcp/source/rtp_format_vp8.h" #include "modules/rtp_rtcp/source/rtp_format_vp9.h" #include "modules/video_coding/codecs/h264/include/h264_globals.h" +#ifndef DISABLE_H265 +#include "modules/video_coding/codecs/h265/include/h265_globals.h" +#endif #include "modules/video_coding/codecs/vp8/include/vp8_globals.h" #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" #include "rtc_base/checks.h" @@ -44,6 +50,15 @@ std::unique_ptr RtpPacketizer::Create( return absl::make_unique( payload, limits, h264.packetization_mode, *fragmentation); } +#ifndef DISABLE_H265 + case kVideoCodecH265: { + RTC_CHECK(fragmentation); + const auto& h265 = + absl::get(rtp_video_header.video_type_header); + return absl::make_unique( + payload, limits, h265.packetization_mode, *fragmentation); + } +#endif case kVideoCodecVP8: { const auto& vp8 = absl::get(rtp_video_header.video_type_header); @@ -145,6 +160,10 @@ RtpDepacketizer* RtpDepacketizer::Create(absl::optional type) { } switch (*type) { +#ifndef DISABLE_H265 + case kVideoCodecH265: + return new RtpDepacketizerH265(); +#endif case kVideoCodecH264: return new RtpDepacketizerH264(); case kVideoCodecVP8: diff --git a/modules/rtp_rtcp/source/rtp_format_h265.cc b/modules/rtp_rtcp/source/rtp_format_h265.cc new file mode 100644 index 00000000000..393b3aab935 --- /dev/null +++ b/modules/rtp_rtcp/source/rtp_format_h265.cc @@ -0,0 +1,645 @@ +/* + * Intel License + */ + +#include + +#include "common_video/h264/h264_common.h" +#include "common_video/h265/h265_common.h" +#include "common_video/h265/h265_pps_parser.h" +#include "common_video/h265/h265_sps_parser.h" +#include "common_video/h265/h265_vps_parser.h" +#include "modules/include/module_common_types.h" +#include "modules/rtp_rtcp/source/byte_io.h" +#include "modules/rtp_rtcp/source/rtp_format_h265.h" +#include "modules/rtp_rtcp/source/rtp_packet_to_send.h" +#include "rtc_base/logging.h" +#include "rtc_base/system/fallthrough.h" +using namespace rtc; + +namespace webrtc { +namespace { + +enum NaluType { + kTrailN = 0, + kTrailR = 1, + kTsaN = 2, + kTsaR = 3, + kStsaN = 4, + kStsaR = 5, + kRadlN = 6, + kRadlR = 7, + kBlaWLp = 16, + kBlaWRadl = 17, + kBlaNLp = 18, + kIdrWRadl = 19, + kIdrNLp = 20, + kCra = 21, + kVps = 32, + kHevcSps = 33, + kHevcPps = 34, + kHevcAud = 35, + kPrefixSei = 39, + kSuffixSei = 40, + kHevcAp = 48, + kHevcFu = 49 +}; + +/* + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | PayloadHdr (Type=49) | FU header | DONL (cond) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-| +*/ +// Unlike H.264, HEVC NAL header is 2-bytes. +static const size_t kHevcNalHeaderSize = 2; +// H.265's FU is constructed of 2-byte payload header, and 1-byte FU header +static const size_t kHevcFuHeaderSize = 1; +static const size_t kHevcLengthFieldSize = 2; +static const size_t kHevcApHeaderSize = + kHevcNalHeaderSize + kHevcLengthFieldSize; + +enum HevcNalHdrMasks { + kHevcFBit = 0x80, + kHevcTypeMask = 0x7E, + kHevcLayerIDHMask = 0x1, + kHevcLayerIDLMask = 0xF8, + kHevcTIDMask = 0x7, + kHevcTypeMaskN = 0x81, + kHevcTypeMaskInFuHeader = 0x3F +}; + +// Bit masks for FU headers. +enum HevcFuDefs { kHevcSBit = 0x80, kHevcEBit = 0x40, kHevcFuTypeBit = 0x3F }; + +// TODO(pbos): Avoid parsing this here as well as inside the jitter buffer. +bool ParseApStartOffsets(const uint8_t* nalu_ptr, + size_t length_remaining, + std::vector* offsets) { + size_t offset = 0; + while (length_remaining > 0) { + // Buffer doesn't contain room for additional nalu length. + if (length_remaining < sizeof(uint16_t)) + return false; + uint16_t nalu_size = ByteReader::ReadBigEndian(nalu_ptr); + nalu_ptr += sizeof(uint16_t); + length_remaining -= sizeof(uint16_t); + if (nalu_size > length_remaining) + return false; + nalu_ptr += nalu_size; + length_remaining -= nalu_size; + + offsets->push_back(offset + kHevcApHeaderSize); + offset += kHevcLengthFieldSize + nalu_size; + } + return true; +} + +} // namespace + +RtpPacketizerH265::RtpPacketizerH265( + rtc::ArrayView payload, + PayloadSizeLimits limits, + H265PacketizationMode packetization_mode, + const RTPFragmentationHeader& fragmentation) + : limits_(limits), + num_packets_left_(0) { + // Guard against uninitialized memory in packetization_mode. + RTC_CHECK(packetization_mode == H265PacketizationMode::NonInterleaved || + packetization_mode == H265PacketizationMode::SingleNalUnit); + + for (size_t i = 0; i < fragmentation.fragmentationVectorSize; ++i) { + const uint8_t* fragment = + payload.data() + fragmentation.fragmentationOffset[i]; + const size_t fragment_length = fragmentation.fragmentationLength[i]; + input_fragments_.push_back(Fragment(fragment, fragment_length)); + } + + if (!GeneratePackets(packetization_mode)) { + // If failed to generate all the packets, discard already generated + // packets in case the caller would ignore return value and still try to + // call NextPacket(). + num_packets_left_ = 0; + while (!packets_.empty()) { + packets_.pop(); + } + } +} + +RtpPacketizerH265::~RtpPacketizerH265() {} + +size_t RtpPacketizerH265::NumPackets() const { + return num_packets_left_; +} + +RtpPacketizerH265::Fragment::Fragment(const uint8_t* buffer, size_t length) + : buffer(buffer), length(length) {} +RtpPacketizerH265::Fragment::Fragment(const Fragment& fragment) + : buffer(fragment.buffer), length(fragment.length) {} + + +bool RtpPacketizerH265::GeneratePackets( + H265PacketizationMode packetization_mode) { + // For HEVC we follow non-interleaved mode for the packetization, + // and don't support single-nalu mode at present. + for (size_t i = 0; i < input_fragments_.size();) { + size_t fragment_len = input_fragments_[i].length; + int single_packet_capacity = limits_.max_payload_len; + if (input_fragments_.size() == 1) + single_packet_capacity -= limits_.single_packet_reduction_len; + else if (i == 0) + single_packet_capacity -= limits_.first_packet_reduction_len; + else if (i + 1 == input_fragments_.size()) { + // Pretend that last fragment is larger instead of making last packet + // smaller. + single_packet_capacity -= limits_.last_packet_reduction_len; + } + if (fragment_len > single_packet_capacity) { + PacketizeFu(i); + ++i; + } else { + PacketizeSingleNalu(i); + ++i; + } + } + return true; +} + +bool RtpPacketizerH265::PacketizeFu(size_t fragment_index) { + // Fragment payload into packets (FU). + // Strip out the original header and leave room for the FU header. + const Fragment& fragment = input_fragments_[fragment_index]; + PayloadSizeLimits limits = limits_; + limits.max_payload_len -= kHevcFuHeaderSize; + + // Update single/first/last packet reductions unless it is single/first/last + // fragment. + if (input_fragments_.size() != 1) { + // if this fragment is put into a single packet, it might still be the + // first or the last packet in the whole sequence of packets. + if (fragment_index == input_fragments_.size() - 1) { + limits.single_packet_reduction_len = limits_.last_packet_reduction_len; + } else if (fragment_index == 0) { + limits.single_packet_reduction_len = limits_.first_packet_reduction_len; + } else { + limits.single_packet_reduction_len = 0; + } + } + if (fragment_index != 0) + limits.first_packet_reduction_len = 0; + if (fragment_index != input_fragments_.size() - 1) + limits.last_packet_reduction_len = 0; + + // Strip out the original header. + size_t payload_left = fragment.length - kHevcNalHeaderSize; + int offset = kHevcNalHeaderSize; + + std::vector payload_sizes = SplitAboutEqually(payload_left, limits); + if (payload_sizes.empty()) + return false; + + for (size_t i = 0; i < payload_sizes.size(); ++i) { + int packet_length = payload_sizes[i]; + RTC_CHECK_GT(packet_length, 0); + packets_.push(PacketUnit(Fragment(fragment.buffer + offset, packet_length), + /*first_fragment=*/i == 0, + /*last_fragment=*/i == payload_sizes.size() - 1, + false, fragment.buffer[0])); + offset += packet_length; + payload_left -= packet_length; + } + num_packets_left_ += payload_sizes.size(); + RTC_CHECK_EQ(0, payload_left); + return true; +} + + +bool RtpPacketizerH265::PacketizeSingleNalu(size_t fragment_index) { + // Add a single NALU to the queue, no aggregation. + size_t payload_size_left = limits_.max_payload_len; + if (input_fragments_.size() == 1) + payload_size_left -= limits_.single_packet_reduction_len; + else if (fragment_index == 0) + payload_size_left -= limits_.first_packet_reduction_len; + else if (fragment_index + 1 == input_fragments_.size()) + payload_size_left -= limits_.last_packet_reduction_len; + const Fragment* fragment = &input_fragments_[fragment_index]; + if (payload_size_left < fragment->length) { + RTC_LOG(LS_ERROR) << "Failed to fit a fragment to packet in SingleNalu " + "packetization mode. Payload size left " + << payload_size_left << ", fragment length " + << fragment->length << ", packet capacity " + << limits_.max_payload_len; + return false; + } + RTC_CHECK_GT(fragment->length, 0u); + packets_.push(PacketUnit(*fragment, true /* first */, true /* last */, + false /* aggregated */, fragment->buffer[0])); + ++num_packets_left_; + return true; +} + +int RtpPacketizerH265::PacketizeAp(size_t fragment_index) { + // Aggregate fragments into one packet (STAP-A). + size_t payload_size_left = limits_.max_payload_len; + if (input_fragments_.size() == 1) + payload_size_left -= limits_.single_packet_reduction_len; + else if (fragment_index == 0) + payload_size_left -= limits_.first_packet_reduction_len; + int aggregated_fragments = 0; + size_t fragment_headers_length = 0; + const Fragment* fragment = &input_fragments_[fragment_index]; + RTC_CHECK_GE(payload_size_left, fragment->length); + ++num_packets_left_; + + auto payload_size_needed = [&] { + size_t fragment_size = fragment->length + fragment_headers_length; + if (input_fragments_.size() == 1) { + // Single fragment, single packet, payload_size_left already adjusted + // with limits_.single_packet_reduction_len. + return fragment_size; + } + if (fragment_index == input_fragments_.size() - 1) { + // Last fragment, so StrapA might be the last packet. + return fragment_size + limits_.last_packet_reduction_len; + } + return fragment_size; + }; + + while (payload_size_left >= payload_size_needed()) { + RTC_CHECK_GT(fragment->length, 0); + packets_.push(PacketUnit(*fragment, aggregated_fragments == 0, false, true, + fragment->buffer[0])); + payload_size_left -= fragment->length; + payload_size_left -= fragment_headers_length; + + fragment_headers_length = kHevcLengthFieldSize; + // If we are going to try to aggregate more fragments into this packet + // we need to add the STAP-A NALU header and a length field for the first + // NALU of this packet. + if (aggregated_fragments == 0) + fragment_headers_length += kHevcNalHeaderSize + kHevcLengthFieldSize; + ++aggregated_fragments; + + // Next fragment. + ++fragment_index; + if (fragment_index == input_fragments_.size()) + break; + fragment = &input_fragments_[fragment_index]; + } + RTC_CHECK_GT(aggregated_fragments, 0); + packets_.back().last_fragment = true; + return fragment_index; +} + +bool RtpPacketizerH265::NextPacket(RtpPacketToSend* rtp_packet) { + RTC_DCHECK(rtp_packet); + + if (packets_.empty()) { + return false; + } + + PacketUnit packet = packets_.front(); + + if (packet.first_fragment && packet.last_fragment) { + // Single NAL unit packet. + size_t bytes_to_send = packet.source_fragment.length; + uint8_t* buffer = rtp_packet->AllocatePayload(bytes_to_send); + memcpy(buffer, packet.source_fragment.buffer, bytes_to_send); + packets_.pop(); + input_fragments_.pop_front(); + } else if (packet.aggregated) { + bool is_last_packet = num_packets_left_ == 1; + NextAggregatePacket(rtp_packet, is_last_packet); + } else { + NextFragmentPacket(rtp_packet); + } + rtp_packet->SetMarker(packets_.empty()); + --num_packets_left_; + return true; +} + +void RtpPacketizerH265::NextAggregatePacket(RtpPacketToSend* rtp_packet, + bool last) { + size_t payload_capacity = rtp_packet->FreeCapacity(); + RTC_CHECK_GE(payload_capacity, kHevcNalHeaderSize); + uint8_t* buffer = rtp_packet->AllocatePayload(payload_capacity); + + PacketUnit* packet = &packets_.front(); + RTC_CHECK(packet->first_fragment); + uint8_t payload_hdr_h = packet->header >> 8; + uint8_t payload_hdr_l = packet->header & 0xFF; + uint8_t layer_id_h = payload_hdr_h & kHevcLayerIDHMask; + + payload_hdr_h = + (payload_hdr_h & kHevcTypeMaskN) | (kHevcAp << 1) | layer_id_h; + + buffer[0] = payload_hdr_h; + buffer[1] = payload_hdr_l; + int index = kHevcNalHeaderSize; + bool is_last_fragment = packet->last_fragment; + while (packet->aggregated) { + // Add NAL unit length field. + const Fragment& fragment = packet->source_fragment; + ByteWriter::WriteBigEndian(&buffer[index], fragment.length); + index += kHevcLengthFieldSize; + // Add NAL unit. + memcpy(&buffer[index], fragment.buffer, fragment.length); + index += fragment.length; + packets_.pop(); + input_fragments_.pop_front(); + if (is_last_fragment) + break; + packet = &packets_.front(); + is_last_fragment = packet->last_fragment; + } + RTC_CHECK(is_last_fragment); + rtp_packet->SetPayloadSize(index); +} + +void RtpPacketizerH265::NextFragmentPacket(RtpPacketToSend* rtp_packet) { + PacketUnit* packet = &packets_.front(); + // NAL unit fragmented over multiple packets (FU). + // We do not send original NALU header, so it will be replaced by the + // PayloadHdr of the first packet. + uint8_t payload_hdr_h = + packet->header >> 8; // 1-bit F, 6-bit type, 1-bit layerID highest-bit + uint8_t payload_hdr_l = packet->header & 0xFF; + uint8_t layer_id_h = payload_hdr_h & kHevcLayerIDHMask; + uint8_t fu_header = 0; + // S | E |6 bit type. + fu_header |= (packet->first_fragment ? kHevcSBit : 0); + fu_header |= (packet->last_fragment ? kHevcEBit : 0); + uint8_t type = (payload_hdr_h & kHevcTypeMask) >> 1; + fu_header |= type; + // Now update payload_hdr_h with FU type. + payload_hdr_h = + (payload_hdr_h & kHevcTypeMaskN) | (kHevcFu << 1) | layer_id_h; + const Fragment& fragment = packet->source_fragment; + uint8_t* buffer = rtp_packet->AllocatePayload( + kHevcFuHeaderSize + kHevcNalHeaderSize + fragment.length); + buffer[0] = payload_hdr_h; + buffer[1] = payload_hdr_l; + buffer[2] = fu_header; + + if (packet->last_fragment) { + memcpy(buffer + kHevcFuHeaderSize + kHevcNalHeaderSize, fragment.buffer, + fragment.length); + } else { + memcpy(buffer + kHevcFuHeaderSize + kHevcNalHeaderSize, fragment.buffer, + fragment.length); + } + packets_.pop(); +} + +bool RtpDepacketizerH265::Parse(ParsedPayload* parsed_payload, + const uint8_t* payload_data, + size_t payload_data_length) { + RTC_CHECK(parsed_payload != nullptr); + if (payload_data_length == 0) { + RTC_LOG(LS_ERROR) << "Empty payload."; + return false; + } + + offset_ = 0; + length_ = payload_data_length; + modified_buffer_.reset(); + + uint8_t nal_type = (payload_data[0] & kHevcTypeMask) >> 1; + parsed_payload->video_header() + .video_type_header.emplace(); + + if (nal_type == H265::NaluType::kFU) { + // Fragmented NAL units (FU-A). + if (!ParseFuNalu(parsed_payload, payload_data)) + return false; + } else { + // We handle STAP-A and single NALU's the same way here. The jitter buffer + // will depacketize the STAP-A into NAL units later. + // TODO(sprang): Parse STAP-A offsets here and store in fragmentation vec. + if (!ProcessApOrSingleNalu(parsed_payload, payload_data)) + return false; + } + + const uint8_t* payload = + modified_buffer_ ? modified_buffer_->data() : payload_data; + + parsed_payload->payload = payload + offset_; + parsed_payload->payload_length = length_; + return true; +} + +bool RtpDepacketizerH265::ProcessApOrSingleNalu( + RtpDepacketizer::ParsedPayload* parsed_payload, + const uint8_t* payload_data) { + parsed_payload->video_header().width = 0; + parsed_payload->video_header().height = 0; + parsed_payload->video_header().codec = kVideoCodecH265; + parsed_payload->video_header().is_first_packet_in_frame = true; + auto& h265_header = absl::get( + parsed_payload->video_header().video_type_header); + + const uint8_t* nalu_start = payload_data + kHevcNalHeaderSize; + const size_t nalu_length = length_ - kHevcNalHeaderSize; + uint8_t nal_type = (payload_data[0] & kHevcTypeMask) >> 1; + std::vector nalu_start_offsets; + if (nal_type == H265::NaluType::kAP) { + // Skip the StapA header (StapA NAL type + length). + if (length_ <= kHevcApHeaderSize) { + RTC_LOG(LS_ERROR) << "AP header truncated."; + return false; + } + + if (!ParseApStartOffsets(nalu_start, nalu_length, &nalu_start_offsets)) { + RTC_LOG(LS_ERROR) << "AP packet with incorrect NALU packet lengths."; + return false; + } + + h265_header.packetization_type = kH265AP; + // nal_type = (payload_data[kHevcApHeaderSize] & kHevcTypeMask) >> 1; + } else { + h265_header.packetization_type = kH265SingleNalu; + nalu_start_offsets.push_back(0); + } + h265_header.nalu_type = nal_type; + parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameDelta; + + nalu_start_offsets.push_back(length_ + kHevcLengthFieldSize); // End offset. + for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) { + size_t start_offset = nalu_start_offsets[i]; + // End offset is actually start offset for next unit, excluding length field + // so remove that from this units length. + size_t end_offset = nalu_start_offsets[i + 1] - kHevcLengthFieldSize; + if (end_offset - start_offset < kHevcNalHeaderSize) { // Same as H.264. + RTC_LOG(LS_ERROR) << "AP packet too short"; + return false; + } + + H265NaluInfo nalu; + nalu.type = (payload_data[start_offset] & kHevcTypeMask) >> 1; + nalu.vps_id = -1; + nalu.sps_id = -1; + nalu.pps_id = -1; + start_offset += kHevcNalHeaderSize; + + switch (nalu.type) { + case H265::NaluType::kVps: { + absl::optional vps = H265VpsParser::ParseVps( + &payload_data[start_offset], end_offset - start_offset); + if (vps) { + nalu.vps_id = vps->id; + } else { + RTC_LOG(LS_WARNING) << "Failed to parse VPS id from VPS slice."; + } + break; + } + case H265::NaluType::kSps: { + // Check if VUI is present in SPS and if it needs to be modified to + // avoid excessive decoder latency. + + // Copy any previous data first (likely just the first header). + std::unique_ptr output_buffer(new rtc::Buffer()); + if (start_offset) + output_buffer->AppendData(payload_data, start_offset); + + absl::optional sps = H265SpsParser::ParseSps( + &payload_data[start_offset], end_offset - start_offset); + + if (sps) { + parsed_payload->video_header().width = sps->width; + parsed_payload->video_header().height = sps->height; + nalu.sps_id = sps->id; + nalu.vps_id = sps->vps_id; + } else { + RTC_LOG(LS_WARNING) + << "Failed to parse SPS and VPS id from SPS slice."; + } + parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameKey; + break; + } + case H265::NaluType::kPps: { + uint32_t pps_id; + uint32_t sps_id; + if (H265PpsParser::ParsePpsIds(&payload_data[start_offset], + end_offset - start_offset, &pps_id, + &sps_id)) { + nalu.pps_id = pps_id; + nalu.sps_id = sps_id; + } else { + RTC_LOG(LS_WARNING) + << "Failed to parse PPS id and SPS id from PPS slice."; + } + break; + } + case H265::NaluType::kIdr: + parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameKey; + RTC_FALLTHROUGH(); + case H265::NaluType::kTrailN: + case H265::NaluType::kTrailR: { + absl::optional pps_id = + H265PpsParser::ParsePpsIdFromSliceSegmentLayerRbsp( + &payload_data[start_offset], end_offset - start_offset, + nalu.type); + if (pps_id) { + nalu.pps_id = *pps_id; + } else { + RTC_LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: " + << static_cast(nalu.type); + } + break; + } + // Slices below don't contain SPS or PPS ids. + case H265::NaluType::kAud: + case H265::NaluType::kTsaN: + case H265::NaluType::kTsaR: + case H265::NaluType::kStsaN: + case H265::NaluType::kStsaR: + case H265::NaluType::kRadlN: + case H265::NaluType::kRadlR: + case H265::NaluType::kBlaWLp: + case H265::NaluType::kBlaWRadl: + case H265::NaluType::kIdrWRadl: + case H265::NaluType::kPrefixSei: + case H265::NaluType::kSuffixSei: + break; + case H265::NaluType::kAP: + case H265::NaluType::kFU: + RTC_LOG(LS_WARNING) << "Unexpected AP or FU received."; + return false; + } + + if (h265_header.nalus_length == kMaxNalusPerPacket) { + RTC_LOG(LS_WARNING) + << "Received packet containing more than " << kMaxNalusPerPacket + << " NAL units. Will not keep track sps and pps ids for all of them."; + } else { + h265_header.nalus[h265_header.nalus_length++] = nalu; + } + } + return true; +} + +bool RtpDepacketizerH265::ParseFuNalu( + RtpDepacketizer::ParsedPayload* parsed_payload, + const uint8_t* payload_data) { + if (length_ < kHevcFuHeaderSize + kHevcNalHeaderSize) { + RTC_LOG(LS_ERROR) << "FU NAL units truncated."; + return false; + } + uint8_t f = payload_data[0] & kHevcFBit; + uint8_t layer_id_h = payload_data[0] & kHevcLayerIDHMask; + uint8_t layer_id_l_unshifted = payload_data[1] & kHevcLayerIDLMask; + uint8_t tid = payload_data[1] & kHevcTIDMask; + + uint8_t original_nal_type = payload_data[2] & kHevcTypeMaskInFuHeader; + bool first_fragment = payload_data[2] & kHevcSBit; + H265NaluInfo nalu; + nalu.type = original_nal_type; + nalu.vps_id = -1; + nalu.sps_id = -1; + nalu.pps_id = -1; + if (first_fragment) { + offset_ = 1; + length_ -= 1; + absl::optional pps_id = + H265PpsParser::ParsePpsIdFromSliceSegmentLayerRbsp( + payload_data + kHevcNalHeaderSize + kHevcFuHeaderSize, + length_ - kHevcFuHeaderSize, nalu.type); + if (pps_id) { + nalu.pps_id = *pps_id; + } else { + RTC_LOG(LS_WARNING) + << "Failed to parse PPS from first fragment of FU NAL " + "unit with original type: " + << static_cast(nalu.type); + } + uint8_t* payload = const_cast(payload_data + offset_); + payload[0] = f | original_nal_type << 1 | layer_id_h; + payload[1] = layer_id_l_unshifted | tid; + } else { + offset_ = kHevcNalHeaderSize + kHevcFuHeaderSize; + length_ -= (kHevcNalHeaderSize + kHevcFuHeaderSize); + } + + if (original_nal_type == H265::NaluType::kIdr) { + parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameKey; + } else { + parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameDelta; + } + parsed_payload->video_header().width = 0; + parsed_payload->video_header().height = 0; + parsed_payload->video_header().codec = kVideoCodecH265; + parsed_payload->video_header().is_first_packet_in_frame = first_fragment; + auto& h265_header = absl::get( + parsed_payload->video_header().video_type_header); + h265_header.packetization_type = kH265FU; + h265_header.nalu_type = original_nal_type; + if (first_fragment) { + h265_header.nalus[h265_header.nalus_length] = nalu; + h265_header.nalus_length = 1; + } + return true; +} + +} // namespace webrtc diff --git a/modules/rtp_rtcp/source/rtp_format_h265.h b/modules/rtp_rtcp/source/rtp_format_h265.h new file mode 100644 index 00000000000..a5b64a0dd42 --- /dev/null +++ b/modules/rtp_rtcp/source/rtp_format_h265.h @@ -0,0 +1,129 @@ +/* + * Intel License + */ + +#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H265_H_ +#define WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H265_H_ + +#include +#include +#include "api/array_view.h" +#include "modules/include/module_common_types.h" +#include "modules/rtp_rtcp/source/rtp_format.h" +#include "modules/rtp_rtcp/source/rtp_packet_to_send.h" +#include "modules/rtp_rtcp/source/rtp_format.h" +#include "modules/video_coding/codecs/h265/include/h265_globals.h" +#include "rtc_base/buffer.h" +#include "rtc_base/constructor_magic.h" + +namespace webrtc { + +class RtpPacketizerH265 : public RtpPacketizer { + public: + // Initialize with payload from encoder. + // The payload_data must be exactly one encoded H.265 frame. + RtpPacketizerH265(rtc::ArrayView payload, + PayloadSizeLimits limits, + H265PacketizationMode packetization_mode, + const RTPFragmentationHeader& fragmentation); + + ~RtpPacketizerH265() override; + + size_t NumPackets() const override; + + // Get the next payload with H.265 payload header. + // buffer is a pointer to where the output will be written. + // bytes_to_send is an output variable that will contain number of bytes + // written to buffer. The parameter last_packet is true for the last packet of + // the frame, false otherwise (i.e., call the function again to get the + // next packet). + // Returns true on success or false if there was no payload to packetize. + bool NextPacket(RtpPacketToSend* rtp_packet) override; + + private: + struct Packet { + Packet(size_t offset, + size_t size, + bool first_fragment, + bool last_fragment, + bool aggregated, + uint16_t header) + : offset(offset), + size(size), + first_fragment(first_fragment), + last_fragment(last_fragment), + aggregated(aggregated), + header(header) {} + + size_t offset; + size_t size; + bool first_fragment; + bool last_fragment; + bool aggregated; + uint16_t header; // Different from H264 + }; + struct Fragment { + Fragment(const uint8_t* buffer, size_t length); + explicit Fragment(const Fragment& fragment); + const uint8_t* buffer = nullptr; + size_t length = 0; + std::unique_ptr tmp_buffer; + }; + struct PacketUnit { + PacketUnit(const Fragment& source_fragment, + bool first_fragment, + bool last_fragment, + bool aggregated, + uint16_t header) + : source_fragment(source_fragment), + first_fragment(first_fragment), + last_fragment(last_fragment), + aggregated(aggregated), + header(header) {} + + const Fragment source_fragment; + bool first_fragment; + bool last_fragment; + bool aggregated; + uint16_t header; + }; + typedef std::queue PacketQueue; + std::deque input_fragments_; + std::queue packets_; + + bool GeneratePackets(H265PacketizationMode packetization_mode); + bool PacketizeFu(size_t fragment_index); + int PacketizeAp(size_t fragment_index); + bool PacketizeSingleNalu(size_t fragment_index); + + void NextAggregatePacket(RtpPacketToSend* rtp_packet, bool last); + void NextFragmentPacket(RtpPacketToSend* rtp_packet); + + const PayloadSizeLimits limits_; + size_t num_packets_left_; + RTPFragmentationHeader fragmentation_; + + RTC_DISALLOW_COPY_AND_ASSIGN(RtpPacketizerH265); +}; + +// Depacketizer for H.265. +class RtpDepacketizerH265 : public RtpDepacketizer { + public: + virtual ~RtpDepacketizerH265() {} + + bool Parse(ParsedPayload* parsed_payload, + const uint8_t* payload_data, + size_t payload_data_length) override; + + private: + bool ParseFuNalu(RtpDepacketizer::ParsedPayload* parsed_payload, + const uint8_t* payload_data); + bool ProcessApOrSingleNalu(RtpDepacketizer::ParsedPayload* parsed_payload, + const uint8_t* payload_data); + + size_t offset_; + size_t length_; + std::unique_ptr modified_buffer_; +}; +} // namespace webrtc +#endif // WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H265_H_ diff --git a/modules/rtp_rtcp/source/rtp_sender_video.cc b/modules/rtp_rtcp/source/rtp_sender_video.cc index b69b0d5542e..4537f3ff4e5 100644 --- a/modules/rtp_rtcp/source/rtp_sender_video.cc +++ b/modules/rtp_rtcp/source/rtp_sender_video.cc @@ -229,6 +229,10 @@ void RTPSenderVideo::RegisterPayloadType(int8_t payload_type, video_type = kVideoCodecVP9; } else if (absl::EqualsIgnoreCase(payload_name, "H264")) { video_type = kVideoCodecH264; +#ifndef DISABLE_H265 + } else if (absl::EqualsIgnoreCase(payload_name, "H265")) { + video_type = kVideoCodecH265; +#endif } else { video_type = kVideoCodecGeneric; } @@ -448,8 +452,9 @@ bool RTPSenderVideo::SendVideo(VideoFrameType frame_type, int32_t retransmission_settings; bool set_video_rotation; bool set_color_space = false; - bool set_frame_marking = video_header->codec == kVideoCodecH264 && - video_header->frame_marking.temporal_id != kNoTemporalIdx; + bool set_frame_marking = + video_header->codec == kVideoCodecH264 && + video_header->frame_marking.temporal_id != kNoTemporalIdx; const absl::optional playout_delay = playout_delay_oracle_->PlayoutDelayToSend(video_header->playout_delay); @@ -817,6 +822,9 @@ uint8_t RTPSenderVideo::GetTemporalId(const RTPVideoHeader& header) { return vp9.temporal_idx; } uint8_t operator()(const RTPVideoHeaderH264&) { return kNoTemporalIdx; } +#ifndef DISABLE_H265 + uint8_t operator()(const RTPVideoHeaderH265&) { return kNoTemporalIdx; } +#endif uint8_t operator()(const absl::monostate&) { return kNoTemporalIdx; } }; switch (header.codec) { diff --git a/modules/rtp_rtcp/source/rtp_video_header.h b/modules/rtp_rtcp/source/rtp_video_header.h index 78f8c4b2d19..e6daece5c08 100644 --- a/modules/rtp_rtcp/source/rtp_video_header.h +++ b/modules/rtp_rtcp/source/rtp_video_header.h @@ -24,14 +24,25 @@ #include "api/video/video_timing.h" #include "common_types.h" // NOLINT(build/include) #include "modules/video_coding/codecs/h264/include/h264_globals.h" +#ifndef DISABLE_H265 +#include "modules/video_coding/codecs/h265/include/h265_globals.h" +#endif #include "modules/video_coding/codecs/vp8/include/vp8_globals.h" #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" namespace webrtc { +#ifndef DISABLE_H265 +using RTPVideoTypeHeader = absl::variant; +#else using RTPVideoTypeHeader = absl::variant; +#endif struct RTPVideoHeader { struct GenericDescriptorInfo { diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index 49d39a9c657..f73a04aa61b 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -140,6 +140,13 @@ rtc_static_library("video_coding") { "video_receiver.cc", ] + if (rtc_use_h265) { + sources += [ + "h265_vps_sps_pps_tracker.cc", + "h265_vps_sps_pps_tracker.h", + ] + } + deps += [ ":codec_globals_headers", ":encoded_frame", diff --git a/modules/video_coding/codecs/h265/include/h265_globals.h b/modules/video_coding/codecs/h265/include/h265_globals.h new file mode 100644 index 00000000000..bc0eef236cf --- /dev/null +++ b/modules/video_coding/codecs/h265/include/h265_globals.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file contains codec dependent definitions that are needed in +// order to compile the WebRTC codebase, even if this codec is not used. + +#ifndef MODULES_VIDEO_CODING_CODECS_H265_INCLUDE_H265_GLOBALS_H_ +#define MODULES_VIDEO_CODING_CODECS_H265_INCLUDE_H265_GLOBALS_H_ + +#ifndef DISABLE_H265 + +#include "modules/video_coding/codecs/h264/include/h264_globals.h" + +namespace webrtc { + +// The packetization types that we support: single, aggregated, and fragmented. +enum H265PacketizationTypes { + kH265SingleNalu, // This packet contains a single NAL unit. + kH265AP, // This packet contains aggregation Packet. + // If this packet has an associated NAL unit type, + // it'll be for the first such aggregated packet. + kH265FU, // This packet contains a FU (fragmentation + // unit) packet, meaning it is a part of a frame + // that was too large to fit into a single packet. +}; + +struct H265NaluInfo { + uint8_t type; + int vps_id; + int sps_id; + int pps_id; +}; + +enum class H265PacketizationMode { + NonInterleaved = 0, // Mode 1 - STAP-A, FU-A is allowed + SingleNalUnit // Mode 0 - only single NALU allowed +}; + +struct RTPVideoHeaderH265 { + // The NAL unit type. If this is a header for a fragmented packet, it's the + // NAL unit type of the original data. If this is the header for an aggregated + // packet, it's the NAL unit type of the first NAL unit in the packet. + uint8_t nalu_type; + H265PacketizationTypes packetization_type; + H265NaluInfo nalus[kMaxNalusPerPacket]; + size_t nalus_length; + // The packetization type of this buffer - single, aggregated or fragmented. + H265PacketizationMode packetization_mode; +}; + +} // namespace webrtc + +#endif + +#endif // MODULES_VIDEO_CODING_CODECS_H265_INCLUDE_H265_GLOBALS_H_ diff --git a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc index 8c866995f45..f9f297d9bfb 100644 --- a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc +++ b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc @@ -89,6 +89,12 @@ int MultiplexEncoderAdapter::InitEncode(const VideoCodec* inst, key_frame_interval_ = settings.H264()->keyFrameInterval; settings.H264()->keyFrameInterval = 0; break; +#ifndef DISABLE_H265 + case kVideoCodecH265: + key_frame_interval_ = settings.H265()->keyFrameInterval; + settings.H265()->keyFrameInterval = 0; + break; +#endif default: break; } diff --git a/modules/video_coding/encoded_frame.cc b/modules/video_coding/encoded_frame.cc index a53f88dcb01..31eff9a099d 100644 --- a/modules/video_coding/encoded_frame.cc +++ b/modules/video_coding/encoded_frame.cc @@ -151,6 +151,12 @@ void VCMEncodedFrame::CopyCodecSpecific(const RTPVideoHeader* header) { } break; } +#ifndef DISABLE_H265 + case kVideoCodecH265: { + _codecSpecificInfo.codecType = kVideoCodecH265; + break; + } +#endif default: { _codecSpecificInfo.codecType = kVideoCodecGeneric; break; diff --git a/modules/video_coding/h265_vps_sps_pps_tracker.cc b/modules/video_coding/h265_vps_sps_pps_tracker.cc new file mode 100644 index 00000000000..088feac3412 --- /dev/null +++ b/modules/video_coding/h265_vps_sps_pps_tracker.cc @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/h265_vps_sps_pps_tracker.h" + +#include +#include + +#include "common_video/h264/h264_common.h" +#include "common_video/h265/h265_common.h" +#include "common_video/h265/h265_pps_parser.h" +#include "common_video/h265/h265_sps_parser.h" +#include "common_video/h265/h265_vps_parser.h" +#include "modules/video_coding/codecs/h264/include/h264_globals.h" +#include "modules/video_coding/codecs/h265/include/h265_globals.h" +#include "modules/video_coding/frame_object.h" +#include "modules/video_coding/packet_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace video_coding { + +namespace { +const uint8_t start_code_h265[] = {0, 0, 0, 1}; +} // namespace + +H265VpsSpsPpsTracker::PacketAction H265VpsSpsPpsTracker::CopyAndFixBitstream( + VCMPacket* packet) { + RTC_DCHECK(packet->codec() == kVideoCodecH265); + + const uint8_t* data = packet->dataPtr; + const size_t data_size = packet->sizeBytes; + const RTPVideoHeader& video_header = packet->video_header; + auto& h265_header = + absl::get(packet->video_header.video_type_header); + + bool append_vps_sps_pps = false; + auto vps = vps_data_.end(); + auto sps = sps_data_.end(); + auto pps = pps_data_.end(); + + for (size_t i = 0; i < h265_header.nalus_length; ++i) { + const H265NaluInfo& nalu = h265_header.nalus[i]; + switch (nalu.type) { + case H265::NaluType::kVps: { + vps_data_[nalu.vps_id].size = 0; + break; + } + case H265::NaluType::kSps: { + sps_data_[nalu.sps_id].vps_id = nalu.vps_id; + sps_data_[nalu.sps_id].width = packet->width(); + sps_data_[nalu.sps_id].height = packet->height(); + break; + } + case H265::NaluType::kPps: { + pps_data_[nalu.pps_id].sps_id = nalu.sps_id; + break; + } + case H265::NaluType::kIdr: { + // If this is the first packet of an IDR, make sure we have the required + // SPS/PPS and also calculate how much extra space we need in the buffer + // to prepend the SPS/PPS to the bitstream with start codes. + if (video_header.is_first_packet_in_frame) { + if (nalu.pps_id == -1) { + RTC_LOG(LS_WARNING) << "No PPS id in IDR nalu."; + return kRequestKeyframe; + } + + pps = pps_data_.find(nalu.pps_id); + if (pps == pps_data_.end()) { + RTC_LOG(LS_WARNING) + << "No PPS with id " << nalu.pps_id << " received"; + return kRequestKeyframe; + } + + sps = sps_data_.find(pps->second.sps_id); + if (sps == sps_data_.end()) { + RTC_LOG(LS_WARNING) + << "No SPS with id << " << pps->second.sps_id << " received"; + return kRequestKeyframe; + } + + vps = vps_data_.find(sps->second.vps_id); + if (vps == vps_data_.end()) { + RTC_LOG(LS_WARNING) + << "No VPS with id " << sps->second.vps_id << " received"; + return kRequestKeyframe; + } + + // Since the first packet of every keyframe should have its width and + // height set we set it here in the case of it being supplied out of + // band. + packet->video_header.width = sps->second.width; + packet->video_header.height = sps->second.height; + + // If the VPS/SPS/PPS was supplied out of band then we will have saved + // the actual bitstream in |data|. + // This branch is not verified. + if (vps->second.data && sps->second.data && pps->second.data) { + RTC_DCHECK_GT(vps->second.size, 0); + RTC_DCHECK_GT(sps->second.size, 0); + RTC_DCHECK_GT(pps->second.size, 0); + append_vps_sps_pps = true; + } + } + break; + } + default: + break; + } + } + + RTC_CHECK(!append_vps_sps_pps || + (sps != sps_data_.end() && pps != pps_data_.end())); + + // Calculate how much space we need for the rest of the bitstream. + size_t required_size = 0; + + if (append_vps_sps_pps) { + required_size += vps->second.size + sizeof(start_code_h265); + required_size += sps->second.size + sizeof(start_code_h265); + required_size += pps->second.size + sizeof(start_code_h265); + } + + if (h265_header.packetization_type == kH265AP) { + const uint8_t* nalu_ptr = data + 1; + while (nalu_ptr < data + data_size) { + RTC_DCHECK(video_header.is_first_packet_in_frame); + required_size += sizeof(start_code_h265); + + // The first two bytes describe the length of a segment. + uint16_t segment_length = nalu_ptr[0] << 8 | nalu_ptr[1]; + nalu_ptr += 2; + + required_size += segment_length; + nalu_ptr += segment_length; + } + } else { + if (video_header.is_first_packet_in_frame) + required_size += sizeof(start_code_h265); + required_size += data_size; + } + + // Then we copy to the new buffer. + uint8_t* buffer = new uint8_t[required_size]; + uint8_t* insert_at = buffer; + + if (append_vps_sps_pps) { + // Insert VPS. + memcpy(insert_at, start_code_h265, sizeof(start_code_h265)); + insert_at += sizeof(start_code_h265); + memcpy(insert_at, vps->second.data.get(), vps->second.size); + insert_at += vps->second.size; + + // Insert SPS. + memcpy(insert_at, start_code_h265, sizeof(start_code_h265)); + insert_at += sizeof(start_code_h265); + memcpy(insert_at, sps->second.data.get(), sps->second.size); + insert_at += sps->second.size; + + // Insert PPS. + memcpy(insert_at, start_code_h265, sizeof(start_code_h265)); + insert_at += sizeof(start_code_h265); + memcpy(insert_at, pps->second.data.get(), pps->second.size); + insert_at += pps->second.size; + + // Update codec header to reflect the newly added SPS and PPS. + H265NaluInfo vps_info; + vps_info.type = H265::NaluType::kVps; + vps_info.vps_id = vps->first; + vps_info.sps_id = -1; + vps_info.pps_id = -1; + H265NaluInfo sps_info; + sps_info.type = H265::NaluType::kSps; + sps_info.vps_id = vps->first; + sps_info.sps_id = sps->first; + sps_info.pps_id = -1; + H265NaluInfo pps_info; + pps_info.type = H265::NaluType::kPps; + pps_info.vps_id = vps->first; + pps_info.sps_id = sps->first; + pps_info.pps_id = pps->first; + if (h265_header.nalus_length + 2 <= kMaxNalusPerPacket) { + h265_header.nalus[h265_header.nalus_length++] = vps_info; + h265_header.nalus[h265_header.nalus_length++] = sps_info; + h265_header.nalus[h265_header.nalus_length++] = pps_info; + } else { + RTC_LOG(LS_WARNING) << "Not enough space in H.264 codec header to insert " + "SPS/PPS provided out-of-band."; + } + } + + // Copy the rest of the bitstream and insert start codes. + if (h265_header.packetization_type == kH265AP) { + const uint8_t* nalu_ptr = data + 1; + while (nalu_ptr < data + data_size) { + memcpy(insert_at, start_code_h265, sizeof(start_code_h265)); + insert_at += sizeof(start_code_h265); + + // The first two bytes describe the length of a segment. + uint16_t segment_length = nalu_ptr[0] << 8 | nalu_ptr[1]; + nalu_ptr += 2; + + size_t copy_end = nalu_ptr - data + segment_length; + if (copy_end > data_size) { + delete[] buffer; + return kDrop; + } + + memcpy(insert_at, nalu_ptr, segment_length); + insert_at += segment_length; + nalu_ptr += segment_length; + } + } else { + if (video_header.is_first_packet_in_frame) { + memcpy(insert_at, start_code_h265, sizeof(start_code_h265)); + insert_at += sizeof(start_code_h265); + } + memcpy(insert_at, data, data_size); + } + + packet->dataPtr = buffer; + packet->sizeBytes = required_size; + return kInsert; +} + +void H265VpsSpsPpsTracker::InsertVpsSpsPpsNalus( + const std::vector& vps, + const std::vector& sps, + const std::vector& pps) { + constexpr size_t kNaluHeaderOffset = 1; + if (vps.size() < kNaluHeaderOffset) { + RTC_LOG(LS_WARNING) << "VPS size " << vps.size() << " is smaller than " + << kNaluHeaderOffset; + return; + } + if ((vps[0] & 0x7e) >> 1 != H265::NaluType::kSps) { + RTC_LOG(LS_WARNING) << "SPS Nalu header missing"; + return; + } + if (sps.size() < kNaluHeaderOffset) { + RTC_LOG(LS_WARNING) << "SPS size " << sps.size() << " is smaller than " + << kNaluHeaderOffset; + return; + } + if ((sps[0] & 0x7e) >> 1 != H265::NaluType::kSps) { + RTC_LOG(LS_WARNING) << "SPS Nalu header missing"; + return; + } + if (pps.size() < kNaluHeaderOffset) { + RTC_LOG(LS_WARNING) << "PPS size " << pps.size() << " is smaller than " + << kNaluHeaderOffset; + return; + } + if ((pps[0] & 0x7e) >> 1 != H265::NaluType::kPps) { + RTC_LOG(LS_WARNING) << "SPS Nalu header missing"; + return; + } + absl::optional parsed_vps = H265VpsParser::ParseVps( + vps.data() + kNaluHeaderOffset, vps.size() - kNaluHeaderOffset); + absl::optional parsed_sps = H265SpsParser::ParseSps( + sps.data() + kNaluHeaderOffset, sps.size() - kNaluHeaderOffset); + absl::optional parsed_pps = H265PpsParser::ParsePps( + pps.data() + kNaluHeaderOffset, pps.size() - kNaluHeaderOffset); + + if (!parsed_vps) { + RTC_LOG(LS_WARNING) << "Failed to parse VPS."; + } + + if (!parsed_sps) { + RTC_LOG(LS_WARNING) << "Failed to parse SPS."; + } + + if (!parsed_pps) { + RTC_LOG(LS_WARNING) << "Failed to parse PPS."; + } + + if (!parsed_vps || !parsed_pps || !parsed_sps) { + return; + } + + VpsInfo vps_info; + vps_info.size = vps.size(); + uint8_t* vps_data = new uint8_t[vps_info.size]; + memcpy(vps_data, vps.data(), vps_info.size); + vps_info.data.reset(vps_data); + vps_data_[parsed_vps->id] = std::move(vps_info); + + SpsInfo sps_info; + sps_info.size = sps.size(); + sps_info.width = parsed_sps->width; + sps_info.height = parsed_sps->height; + sps_info.vps_id = parsed_sps->vps_id; + uint8_t* sps_data = new uint8_t[sps_info.size]; + memcpy(sps_data, sps.data(), sps_info.size); + sps_info.data.reset(sps_data); + sps_data_[parsed_sps->id] = std::move(sps_info); + + PpsInfo pps_info; + pps_info.size = pps.size(); + pps_info.sps_id = parsed_pps->sps_id; + uint8_t* pps_data = new uint8_t[pps_info.size]; + memcpy(pps_data, pps.data(), pps_info.size); + pps_info.data.reset(pps_data); + pps_data_[parsed_pps->id] = std::move(pps_info); + + RTC_LOG(LS_INFO) << "Inserted SPS id " << parsed_sps->id << " and PPS id " + << parsed_pps->id << " (referencing SPS " + << parsed_pps->sps_id << ")"; +} + +} // namespace video_coding +} // namespace webrtc diff --git a/modules/video_coding/h265_vps_sps_pps_tracker.h b/modules/video_coding/h265_vps_sps_pps_tracker.h new file mode 100644 index 00000000000..ce33f5ab894 --- /dev/null +++ b/modules/video_coding/h265_vps_sps_pps_tracker.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_H265_VPS_SPS_PPS_TRACKER_H_ +#define MODULES_VIDEO_CODING_H265_VPS_SPS_PPS_TRACKER_H_ + +#include +#include +#include +#include + +#include "modules/include/module_common_types.h" + +namespace webrtc { + +class VCMPacket; + +namespace video_coding { + +class H265VpsSpsPpsTracker { + public: + enum PacketAction { kInsert, kDrop, kRequestKeyframe }; + + PacketAction CopyAndFixBitstream(VCMPacket* packet); + + void InsertVpsSpsPpsNalus(const std::vector& vps, + const std::vector& sps, + const std::vector& pps); + + private: + struct VpsInfo { + size_t size = 0; + std::unique_ptr data; + }; + + struct PpsInfo { + int sps_id = -1; + size_t size = 0; + std::unique_ptr data; + }; + + struct SpsInfo { + int vps_id = -1; + size_t size = 0; + int width = -1; + int height = -1; + std::unique_ptr data; + }; + + std::map vps_data_; + std::map pps_data_; + std::map sps_data_; +}; + +} // namespace video_coding +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_H264_SPS_PPS_TRACKER_H_ diff --git a/modules/video_coding/jitter_buffer_common.h b/modules/video_coding/jitter_buffer_common.h index 1a7f08af3e0..a50a1df268f 100644 --- a/modules/video_coding/jitter_buffer_common.h +++ b/modules/video_coding/jitter_buffer_common.h @@ -54,6 +54,9 @@ enum VCMFrameBufferStateEnum { }; enum { kH264StartCodeLengthBytes = 4 }; +#ifndef DISABLE_H265 +enum { kH265StartCodeLengthBytes = 4 }; +#endif } // namespace webrtc #endif // MODULES_VIDEO_CODING_JITTER_BUFFER_COMMON_H_ diff --git a/modules/video_coding/packet_buffer.cc b/modules/video_coding/packet_buffer.cc index bd1ab03880f..22d9de96fad 100644 --- a/modules/video_coding/packet_buffer.cc +++ b/modules/video_coding/packet_buffer.cc @@ -18,8 +18,14 @@ #include "absl/types/variant.h" #include "api/video/encoded_frame.h" #include "common_video/h264/h264_common.h" +#ifndef DISABLE_H265 +#include "common_video/h265/h265_common.h" +#endif #include "modules/rtp_rtcp/source/rtp_video_header.h" #include "modules/video_coding/codecs/h264/include/h264_globals.h" +#ifndef DISABLE_H265 +#include "modules/video_coding/codecs/h265/include/h265_globals.h" +#endif #include "modules/video_coding/frame_object.h" #include "rtc_base/atomic_ops.h" #include "rtc_base/checks.h" @@ -301,7 +307,13 @@ std::vector> PacketBuffer::FindFrames( bool has_h264_pps = false; bool has_h264_idr = false; bool is_h264_keyframe = false; - +#ifndef DISABLE_H265 + bool is_h265 = data_buffer_[start_index].codec() == kVideoCodecH265; + bool has_h265_sps = false; + bool has_h265_pps = false; + bool has_h265_idr = false; + bool is_h265_keyframe = false; +#endif while (true) { ++tested_packets; frame_size += data_buffer_[start_index].sizeBytes; @@ -338,7 +350,26 @@ std::vector> PacketBuffer::FindFrames( is_h264_keyframe = true; } } - +#ifndef DISABLE_H265 + if (is_h265 && !is_h265_keyframe) { + const auto* h265_header = absl::get_if( + &data_buffer_[start_index].video_header.video_type_header); + if (!h265_header || h265_header->nalus_length >= kMaxNalusPerPacket) + return found_frames; + for (size_t j = 0; j < h265_header->nalus_length; ++j) { + if (h265_header->nalus[j].type == H265::NaluType::kSps) { + has_h265_sps = true; + } else if (h265_header->nalus[j].type == H265::NaluType::kPps) { + has_h265_pps = true; + } else if (h265_header->nalus[j].type == H265::NaluType::kIdr) { + has_h265_idr = true; + } + } + if ((has_h265_sps && has_h265_pps) || has_h265_idr) { + is_h265_keyframe = true; + } + } +#endif if (tested_packets == size_) break; diff --git a/modules/video_coding/session_info.cc b/modules/video_coding/session_info.cc index 3d4af62d34a..d697119b0c4 100644 --- a/modules/video_coding/session_info.cc +++ b/modules/video_coding/session_info.cc @@ -152,7 +152,21 @@ std::vector VCMSessionInfo::GetNaluInfos() const { } return nalu_infos; } - +#ifndef DISABLE_H265 +std::vector VCMSessionInfo::GetH265NaluInfos() const { + if (packets_.empty() || packets_.front().video_header.codec != kVideoCodecH265) + return std::vector(); + std::vector nalu_infos; + for (const VCMPacket& packet : packets_) { + const auto& h265 = + absl::get(packet.video_header.video_type_header); + for (size_t i = 0; i < h265.nalus_length; ++i) { + nalu_infos.push_back(h265.nalus[i]); + } + } + return nalu_infos; +} +#endif void VCMSessionInfo::SetGofInfo(const GofInfoVP9& gof_info, size_t idx) { if (packets_.empty()) return; @@ -211,6 +225,11 @@ size_t VCMSessionInfo::InsertBuffer(uint8_t* frame_buffer, // TODO(pbos): Remove H264 parsing from this step and use a fragmentation // header supplied by the H264 depacketizer. const size_t kH264NALHeaderLengthInBytes = 1; +#ifndef DISABLE_H265 + const size_t kH265NALHeaderLengthInBytes = 2; + const auto* h265 = + absl::get_if(&packet.video_header.video_type_header); +#endif const size_t kLengthFieldLength = 2; const auto* h264 = absl::get_if(&packet.video_header.video_type_header); @@ -236,6 +255,36 @@ size_t VCMSessionInfo::InsertBuffer(uint8_t* frame_buffer, packet.sizeBytes = required_length; return packet.sizeBytes; } +#ifndef DISABLE_H265 + else if (h265 && h265->packetization_type == kH265AP) { + // Similar to H264, for H265 aggregation packets, we rely on jitter buffer + // to remove the two length bytes between each NAL unit, and potentially add + // start codes. + size_t required_length = 0; + const uint8_t* nalu_ptr = + packet_buffer + kH265NALHeaderLengthInBytes; // skip payloadhdr + while (nalu_ptr < packet_buffer + packet.sizeBytes) { + size_t length = BufferToUWord16(nalu_ptr); + required_length += + length + (packet.insertStartCode ? kH265StartCodeLengthBytes : 0); + nalu_ptr += kLengthFieldLength + length; + } + ShiftSubsequentPackets(packet_it, required_length); + nalu_ptr = packet_buffer + kH265NALHeaderLengthInBytes; + uint8_t* frame_buffer_ptr = frame_buffer + offset; + while (nalu_ptr < packet_buffer + packet.sizeBytes) { + size_t length = BufferToUWord16(nalu_ptr); + nalu_ptr += kLengthFieldLength; + // since H265 shares the same start code as H264, use the same Insert + // function to handle start code. + frame_buffer_ptr += Insert(nalu_ptr, length, packet.insertStartCode, + const_cast(frame_buffer_ptr)); + nalu_ptr += length; + } + packet.sizeBytes = required_length; + return packet.sizeBytes; + } +#endif ShiftSubsequentPackets( packet_it, packet.sizeBytes + (packet.insertStartCode ? kH264StartCodeLengthBytes : 0)); @@ -462,7 +511,23 @@ int VCMSessionInfo::InsertPacket(const VCMPacket& packet, IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_))) { last_packet_seq_num_ = packet.seqNum; } - } else { + } +#ifndef DISABLE_H265 + else if (packet.codec() == kVideoCodecH265) { + frame_type_ = packet.video_header.frame_type; + if (packet.is_first_packet_in_frame() && + (first_packet_seq_num_ == -1 || + IsNewerSequenceNumber(first_packet_seq_num_, packet.seqNum))) { + first_packet_seq_num_ = packet.seqNum; + } + if (packet.markerBit && + (last_packet_seq_num_ == -1 || + IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_))) { + last_packet_seq_num_ = packet.seqNum; + } + } +#endif + else { // Only insert media packets between first and last packets (when // available). // Placing check here, as to properly account for duplicate packets. diff --git a/modules/video_coding/session_info.h b/modules/video_coding/session_info.h index 02c423661e1..9ac8734875a 100644 --- a/modules/video_coding/session_info.h +++ b/modules/video_coding/session_info.h @@ -64,7 +64,9 @@ class VCMSessionInfo { int Tl0PicId() const; std::vector GetNaluInfos() const; - +#ifndef DISABLE_H265 + std::vector GetH265NaluInfos() const; +#endif void SetGofInfo(const GofInfoVP9& gof_info, size_t idx); private: diff --git a/test/video_codec_settings.h b/test/video_codec_settings.h index 1557fc9a0e9..39c7b75b78c 100644 --- a/test/video_codec_settings.h +++ b/test/video_codec_settings.h @@ -44,7 +44,8 @@ static void CodecSettings(VideoCodecType codec_type, VideoCodec* settings) { settings->numberOfSimulcastStreams = 0; settings->timing_frame_thresholds = { - kTestTimingFramesDelayMs, kTestOutlierFrameSizePercent, + kTestTimingFramesDelayMs, + kTestOutlierFrameSizePercent, }; settings->codecType = codec_type; @@ -55,6 +56,11 @@ static void CodecSettings(VideoCodecType codec_type, VideoCodec* settings) { case kVideoCodecVP9: *(settings->VP9()) = VideoEncoder::GetDefaultVp9Settings(); return; +#ifndef DISABLE_H265 + case kVideoCodecH265: + *(settings->H265()) = VideoEncoder::GetDefaultH265Settings(); + return; +#endif case kVideoCodecH264: // TODO(brandtr): Set |qpMax| here, when the OpenH264 wrapper supports it. *(settings->H264()) = VideoEncoder::GetDefaultH264Settings(); diff --git a/video/rtp_video_stream_receiver.cc b/video/rtp_video_stream_receiver.cc index 4e513851396..8bb2c63f409 100644 --- a/video/rtp_video_stream_receiver.cc +++ b/video/rtp_video_stream_receiver.cc @@ -290,8 +290,21 @@ int32_t RtpVideoStreamReceiver::OnReceivedPayloadData( case video_coding::H264SpsPpsTracker::kInsert: break; } - - } else { + } +#ifndef DISABLE_H265 + else if (packet.codec() == kVideoCodecH265) { + switch (h265_tracker_.CopyAndFixBitstream(&packet)) { + case video_coding::H265VpsSpsPpsTracker::kRequestKeyframe: + keyframe_request_sender_->RequestKeyFrame(); + RTC_FALLTHROUGH(); + case video_coding::H265VpsSpsPpsTracker::kDrop: + return 0; + case video_coding::H265VpsSpsPpsTracker::kInsert: + break; + } + } +#endif + else { uint8_t* data = new uint8_t[packet.sizeBytes]; memcpy(data, packet.dataPtr, packet.sizeBytes); packet.dataPtr = data; diff --git a/video/rtp_video_stream_receiver.h b/video/rtp_video_stream_receiver.h index 6a63ad50fc0..7576e97ca69 100644 --- a/video/rtp_video_stream_receiver.h +++ b/video/rtp_video_stream_receiver.h @@ -33,6 +33,9 @@ #include "modules/rtp_rtcp/source/contributing_sources.h" #include "modules/video_coding/h264_sps_pps_tracker.h" #include "modules/video_coding/loss_notification_controller.h" +#ifndef DISABLE_H265 +#include "modules/video_coding/h265_vps_sps_pps_tracker.h" +#endif #include "modules/video_coding/packet_buffer.h" #include "modules/video_coding/rtp_frame_reference_finder.h" #include "rtc_base/constructor_magic.h" @@ -221,6 +224,11 @@ class RtpVideoStreamReceiver : public LossNotificationSender, // Maps payload type to codec type, for packetization. std::map> payload_type_map_; + +#ifndef DISABLE_H265 + video_coding::H265VpsSpsPpsTracker h265_tracker_; +#endif + // TODO(johan): Remove pt_codec_params_ once // https://bugs.chromium.org/p/webrtc/issues/detail?id=6883 is resolved. // Maps a payload type to a map of out-of-band supplied codec parameters. diff --git a/video/send_statistics_proxy.cc b/video/send_statistics_proxy.cc index cf417f5c3a7..4d0e41d7b2f 100644 --- a/video/send_statistics_proxy.cc +++ b/video/send_statistics_proxy.cc @@ -43,6 +43,9 @@ enum HistogramCodecType { kVideoVp8 = 1, kVideoVp9 = 2, kVideoH264 = 3, +#ifndef DISABLE_H265 + kVideoH265 = 4, +#endif kVideoMax = 64, }; @@ -70,6 +73,9 @@ HistogramCodecType PayloadNameToHistogramCodecType( return kVideoVp9; case kVideoCodecH264: return kVideoH264; +#ifndef DISABLE_H265 + return kVideoH265; +#endif default: return kVideoUnknown; } diff --git a/video/video_receive_stream.cc b/video/video_receive_stream.cc index 99b83c9855e..cb79829bd69 100644 --- a/video/video_receive_stream.cc +++ b/video/video_receive_stream.cc @@ -86,7 +86,11 @@ VideoCodec CreateDecoderVideoCodec(const VideoReceiveStream::Decoder& decoder) { associated_codec.codecType = kVideoCodecMultiplex; return associated_codec; } - +#ifndef DISABLE_H265 + else if (codec.codecType == kVideoCodecH265) { + *(codec.H265()) = VideoEncoder::GetDefaultH265Settings(); + } +#endif codec.width = 320; codec.height = 180; const int kDefaultStartBitrate = 300; diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc index 013ad8f8535..a637b4ebe6d 100644 --- a/video/video_stream_encoder.cc +++ b/video/video_stream_encoder.cc @@ -131,7 +131,13 @@ bool RequiresEncoderReset(const VideoCodec& previous_send_codec, return true; } break; - +#ifndef DISABLE_H265 + case kVideoCodecH265: + if (new_send_codec.H265() != previous_send_codec.H265()) { + return true; + } + break; +#endif default: break; }