From 243912b03f7d089b1cdda31197baf05a2e37cb70 Mon Sep 17 00:00:00 2001 From: Jianjun Zhu Date: Fri, 25 May 2018 09:57:29 +0800 Subject: [PATCH] Enable H.265 for iOS. This issue also fixes some H.265 depacketizer issues for all platforms. There are still some issues expected to be fixed later. - H.265 only supported on iOS 11, iPhone 7 and later. Device and system check should be performed before calling H.265 APIs. - Profile, tier and level are not correctly handled. - QP is not enabled. This change has been modified during rebasing since some C++ files already been picked to 70-sdk branch. Change-Id: Idf6087bae4f12432178b571ab76ab55127b66f5f Reviewed-on: https://git-ccr-1.devtools.intel.com/gerrit/78608 Reviewed-by: Qiu, Jianlin Tested-by: Qiu, Jianlin --- modules/rtp_rtcp/source/h265_sps_parser.cc | 189 ------ modules/rtp_rtcp/source/h265_sps_parser.h | 31 - sdk/BUILD.gn | 6 + .../PeerConnection/RTCVideoCodec+Private.h | 1 + .../PeerConnection/RTCVideoCodecH265.mm | 68 ++ .../VideoToolbox/RTCVideoDecoderH265.mm | 291 +++++++++ .../VideoToolbox/RTCVideoEncoderH265.mm | 615 ++++++++++++++++++ .../Classes/VideoToolbox/nalu_rewriter.cc | 274 ++++++++ .../Classes/VideoToolbox/nalu_rewriter.h | 26 + .../Headers/WebRTC/RTCVideoCodecH265.h | 43 ++ test/video_codec_settings.h | 4 + 11 files changed, 1328 insertions(+), 220 deletions(-) delete mode 100644 modules/rtp_rtcp/source/h265_sps_parser.cc delete mode 100644 modules/rtp_rtcp/source/h265_sps_parser.h create mode 100644 sdk/objc/Framework/Classes/PeerConnection/RTCVideoCodecH265.mm create mode 100644 sdk/objc/Framework/Classes/VideoToolbox/RTCVideoDecoderH265.mm create mode 100644 sdk/objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH265.mm create mode 100644 sdk/objc/Framework/Headers/WebRTC/RTCVideoCodecH265.h diff --git a/modules/rtp_rtcp/source/h265_sps_parser.cc b/modules/rtp_rtcp/source/h265_sps_parser.cc deleted file mode 100644 index 6e174f6304a..00000000000 --- a/modules/rtp_rtcp/source/h265_sps_parser.cc +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Intel License - */ - -#include "webrtc/modules/rtp_rtcp/source/h265_sps_parser.h" - -#include "webrtc/base/bitbuffer.h" -#include "webrtc/base/bytebuffer.h" -#include "webrtc/base/logging.h" - -#include - -#define RETURN_FALSE_ON_FAIL(x) \ - if (!(x)) { \ - return false; \ - } - -namespace webrtc { - -H265SpsParser::H265SpsParser(const uint8_t* sps, size_t byte_length) - : sps_(sps), byte_length_(byte_length), width_(), height_() { -} - -bool H265SpsParser::Parse() { - // General note: this is based off the 04/2015 version of the H.265 standard. - // You can find it on this page: - // http://www.itu.int/rec/T-REC-H.265 - - const char* sps_bytes = reinterpret_cast(sps_); - // First, parse out rbsp, which is basically the source buffer minus emulation - // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in - // section 7.3.1.1 of the H.265 standard, similar to H264. - rtc::ByteBufferWriter rbsp_buffer; - for (size_t i = 0; i < byte_length_;) { - // Be careful about over/underflow here. byte_length_ - 3 can underflow, and - // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_ - // above, and that expression will produce the number of bytes left in - // the stream including the byte at i. - if (byte_length_ - i >= 3 && sps_[i] == 0 && sps_[i + 1] == 0 && - sps_[i + 2] == 3) { - // Two rbsp bytes + the emulation byte. - rbsp_buffer.WriteBytes(sps_bytes + i, 2); - i += 3; - } else { - // Single rbsp byte. - rbsp_buffer.WriteBytes(sps_bytes + i, 1); - i++; - } - } - - // Now, we need to use a bit buffer to parse through the actual HEVC SPS - // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the - // H.265 standard for a complete description. - // Since we only care about resolution, we ignore the majority of fields, but - // we still have to actively parse through a lot of the data, since many of - // the fields have variable size. - // Unlike H264, for H265, the picture size is indicated by pic_width_in_luma_samples - // and pic_height_in_luma_samples, if conformance_window_flag !=1; - // When conformance_window_flag is 1, the width is adjusted with con_win_xx_offset - // - rtc::BitBuffer parser(reinterpret_cast(rbsp_buffer.Data()), - rbsp_buffer.Length()); - - // The golomb values we have to read, not just consume. - uint32_t golomb_ignored; - - // separate_colour_plane_flag is optional (assumed 0), but has implications - // about the ChromaArrayType, which modifies how we treat crop coordinates. - uint32_t separate_colour_plane_flag = 0; - // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is - // 0. It defaults to 1, when not specified. - uint32_t chroma_format_idc = 1; - - - // sps_video_parameter_set_id: u(4) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(4)); - // sps_max_sub_layers_minus1: u(3) - uint32_t sps_max_sub_layers_minus1 = 0; - RETURN_FALSE_ON_FAIL(parser.ReadBits(&sps_max_sub_layers_minus1, 3)); - // sps_temporal_id_nesting_flag: u(1) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); - // profile_tier_level(1, sps_max_sub_layers_minus1). We are acutally not - // using them, so read/skip over it. - // general_profile_space+general_tier_flag+general_prfile_idc: u(8) - RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1)); - // general_profile_compatabilitiy_flag[32] - RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(4)); - // general_progressive_source_flag + interlaced_source_flag+ non-packed_constraint - // flag + frame_only_constraint_flag: u(4) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(4)); - // general_profile_idc decided flags or reserved. u(43) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(43)); - // general_inbld_flag or reserved 0: u(1) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); - // general_level_idc: u(8) - RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1)); - // if max_sub_layers_minus1 >=1, read the sublayer profile information - std::vector sub_layer_profile_present_flags; - std::vector sub_layer_level_present_flags; - uint32_t sub_layer_profile_present = 0; - uint32_t sub_layer_level_present = 0; - for (uint32_t i = 0; i < sps_max_sub_layers_minus1; i++) { - //sublayer_profile_present_flag and sublayer_level_presnet_flag: u(2) - RETURN_FALSE_ON_FAIL(parser.ReadBits(&sub_layer_profile_present, 1)); - RETURN_FALSE_ON_FAIL(parser.ReadBits(&sub_layer_level_present, 1)); - sub_layer_profile_present_flags.push_back(sub_layer_profile_present); - sub_layer_level_present_flags.push_back(sub_layer_level_present); - } - if (sps_max_sub_layers_minus1 > 0) { - for (uint32_t j = sps_max_sub_layers_minus1; j < 8; j++) { - // reserved 2 bits: u(2) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(2)); - } - } - for (uint32_t k = 0; k < sps_max_sub_layers_minus1; k++) { - if(sub_layer_profile_present_flags[k]) {// - // sub_layer profile_space/tier_flag/profile_idc. ignored. u(8) - RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1)); - // profile_compatability_flag: u(32) - RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(4)); - // sub_layer progressive_source_flag/interlaced_source_flag/ - // non_packed_constraint_flag/frame_only_constraint_flag: u(4) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(4)); - // following 43-bits are profile_idc specific. We simply read/skip it. u(43) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(43)); - // 1-bit profile_idc specific inbld flag. We simply read/skip it. u(1) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); - } - if (sub_layer_level_present_flags[k]) { - // sub_layer_level_idc: u(8) - RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1)); - } - } - //sps_seq_parameter_set_id: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); - // chrome_format_idc: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&chroma_format_idc)); - if (chroma_format_idc == 3) { - // seperate_colour_plane_flag: u(1) - RETURN_FALSE_ON_FAIL(parser.ReadBits(&separate_colour_plane_flag, 1)); - } - uint32_t pic_width_in_luma_samples = 0; - uint32_t pic_height_in_luma_samples = 0; - // pic_width_in_luma_samples: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_width_in_luma_samples)); - // pic_height_in_luma_samples: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_height_in_luma_samples)); - // conformance_window_flag: u(1) - uint32_t conformance_window_flag = 0; - RETURN_FALSE_ON_FAIL(parser.ReadBits(&conformance_window_flag, 1)); - - uint32_t conf_win_left_offset = 0; - uint32_t conf_win_right_offset = 0; - uint32_t conf_win_top_offset = 0; - uint32_t conf_win_bottom_offset = 0; - if (conformance_window_flag) { - // conf_win_left_offset: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&conf_win_left_offset)); - // conf_win_right_offset: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&conf_win_right_offset)); - // conf_win_top_offset: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&conf_win_top_offset)); - // conf_win_bottom_offset: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&conf_win_bottom_offset)); - } - - //For enough to get the resolution information. calcaluate according to HEVC spec 7.4.3.2 - int width = 0; - int height = 0; - - width = pic_width_in_luma_samples; - height = pic_height_in_luma_samples; - - if (conformance_window_flag) { - int sub_width_c = ((1 == chroma_format_idc) || (2 == chroma_format_idc)) && - (0 == separate_colour_plane_flag) ? 2 : 1; - int sub_height_c = (1 == chroma_format_idc) && (0 == separate_colour_plane_flag) ? 2 : 1; - //the offset includes the pixel within conformance window. so don't need to +1 as per spec - width -= sub_width_c*(conf_win_right_offset + conf_win_left_offset); - height -= sub_height_c*(conf_win_top_offset + conf_win_bottom_offset); - } - - width_ = width; - height_ = height; - return true; - -} - -} // namespace webrtc diff --git a/modules/rtp_rtcp/source/h265_sps_parser.h b/modules/rtp_rtcp/source/h265_sps_parser.h deleted file mode 100644 index 6b08b0959f5..00000000000 --- a/modules/rtp_rtcp/source/h265_sps_parser.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Intel License - */ - -#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_H265_SPS_PARSER_H_ -#define WEBRTC_MODULES_RTP_RTCP_SOURCE_H265_SPS_PARSER_H_ - -#include "webrtc/base/common.h" - -namespace webrtc { - -// A class for parsing out sequence parameter set (SPS) data from an H265 NALU. -// Currently, only resolution is read without being ignored. -class H265SpsParser { - public: - H265SpsParser(const uint8_t* sps, size_t byte_length); - // Parses the SPS to completion. Returns true if the SPS was parsed correctly. - bool Parse(); - uint16_t width() { return width_; } - uint16_t height() { return height_; } - - private: - const uint8_t* const sps_; - const size_t byte_length_; - - uint16_t width_; - uint16_t height_; -}; - -} // namespace webrtc -#endif // WEBRTC_MODULES_RTP_RTCP_SOURCE_H265_SPS_PARSER_H_ diff --git a/sdk/BUILD.gn b/sdk/BUILD.gn index 48a349f8e8a..f9c28d7106b 100644 --- a/sdk/BUILD.gn +++ b/sdk/BUILD.gn @@ -445,10 +445,12 @@ if (is_ios || is_mac) { "objc/Framework/Classes/PeerConnection/RTCVideoCodec+Private.h", "objc/Framework/Classes/PeerConnection/RTCVideoCodec.mm", "objc/Framework/Classes/PeerConnection/RTCVideoCodecH264.mm", + "objc/Framework/Classes/PeerConnection/RTCVideoCodecH265.mm", "objc/Framework/Classes/PeerConnection/RTCVideoEncoderSettings.mm", "objc/Framework/Headers/WebRTC/RTCVideoCodec.h", "objc/Framework/Headers/WebRTC/RTCVideoCodecFactory.h", "objc/Framework/Headers/WebRTC/RTCVideoCodecH264.h", + "objc/Framework/Headers/WebRTC/RTCVideoCodecH265.h", ] if (is_ios) { sources += [ @@ -982,6 +984,7 @@ if (is_ios || is_mac) { "objc/Framework/Headers/WebRTC/RTCTracing.h", "objc/Framework/Headers/WebRTC/RTCVideoCapturer.h", "objc/Framework/Headers/WebRTC/RTCVideoCodecH264.h", + "objc/Framework/Headers/WebRTC/RTCVideoCodecH265.h", "objc/Framework/Headers/WebRTC/RTCVideoFrame.h", "objc/Framework/Headers/WebRTC/RTCVideoFrameBuffer.h", "objc/Framework/Headers/WebRTC/RTCVideoRenderer.h", @@ -1179,7 +1182,10 @@ if (is_ios || is_mac) { ] sources = [ "objc/Framework/Classes/VideoToolbox/RTCVideoDecoderH264.mm", + "objc/Framework/Classes/VideoToolbox/RTCVideoDecoderH265.mm", "objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH264.mm", + "objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH265.mm", + "objc/Framework/Headers/WebRTC/RTCVideoFrameBuffer.h", ] configs += [ diff --git a/sdk/objc/Framework/Classes/PeerConnection/RTCVideoCodec+Private.h b/sdk/objc/Framework/Classes/PeerConnection/RTCVideoCodec+Private.h index 4a2cf5228bb..e5547c0f6ff 100644 --- a/sdk/objc/Framework/Classes/PeerConnection/RTCVideoCodec+Private.h +++ b/sdk/objc/Framework/Classes/PeerConnection/RTCVideoCodec+Private.h @@ -11,6 +11,7 @@ #import "WebRTC/RTCVideoCodec.h" #import "WebRTC/RTCVideoCodecH264.h" +#import "WebRTC/RTCVideoCodecH265.h" #include "api/video_codecs/sdp_video_format.h" #include "common_video/include/video_frame.h" diff --git a/sdk/objc/Framework/Classes/PeerConnection/RTCVideoCodecH265.mm b/sdk/objc/Framework/Classes/PeerConnection/RTCVideoCodecH265.mm new file mode 100644 index 00000000000..8d045c2affe --- /dev/null +++ b/sdk/objc/Framework/Classes/PeerConnection/RTCVideoCodecH265.mm @@ -0,0 +1,68 @@ +/* + * Copyright 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* This file is borrowed from + * webrtc/sdk/objc/Framework/Classes/PeerConnection/RTCVideoCodecH265.mm */ + +#import "WebRTC/RTCVideoCodecH265.h" + +#include + +#import "RTCVideoCodec+Private.h" +#import "WebRTC/RTCVideoCodec.h" + +#include "rtc_base/timeutils.h" +#include "system_wrappers/include/field_trial.h" + +static NSString* kH265CodecName = @"H265"; +// TODO(jianjunz): This is value is not correct. +static NSString* kLevel31Main = @"4d001f"; + +@implementation RTCCodecSpecificInfoH265 +@end + +// Encoder factory. +@implementation RTCVideoEncoderFactoryH265 + +- (NSArray*)supportedCodecs { + NSMutableArray* codecs = [NSMutableArray array]; + NSString* codecName = kH265CodecName; + + NSDictionary* mainParams = @{ + @"profile-level-id" : kLevel31Main, + @"level-asymmetry-allowed" : @"1", + @"packetization-mode" : @"1", + }; + RTCVideoCodecInfo* constrainedBaselineInfo = + [[RTCVideoCodecInfo alloc] initWithName:codecName parameters:mainParams]; + [codecs addObject:constrainedBaselineInfo]; + + return [codecs copy]; +} + +- (id)createEncoder:(RTCVideoCodecInfo*)info { + return [[RTCVideoEncoderH265 alloc] initWithCodecInfo:info]; +} + +@end + +// Decoder factory. +@implementation RTCVideoDecoderFactoryH265 + +- (id)createDecoder:(RTCVideoCodecInfo*)info { + return [[RTCVideoDecoderH265 alloc] init]; +} + +- (NSArray*)supportedCodecs { + NSString* codecName = kH265CodecName; + return @[ [[RTCVideoCodecInfo alloc] initWithName:codecName parameters:nil] ]; +} + +@end diff --git a/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoDecoderH265.mm b/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoDecoderH265.mm new file mode 100644 index 00000000000..2a374e3e422 --- /dev/null +++ b/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoDecoderH265.mm @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#import "WebRTC/RTCVideoCodecH265.h" + +#import + +#include "modules/video_coding/include/video_error_codes.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/timeutils.h" +#include "sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.h" + +#import "WebRTC/RTCVideoFrame.h" +#import "WebRTC/RTCVideoFrameBuffer.h" +#import "helpers.h" +#import "scoped_cftyperef.h" + +#if defined(WEBRTC_IOS) +#import "WebRTC/UIDevice+RTCDevice.h" +#import "sdk/objc/Framework/Classes/Common/RTCUIApplicationStatusObserver.h" +#endif + +// Struct that we pass to the decoder per frame to decode. We receive it again +// in the decoder callback. +struct RTCH265FrameDecodeParams { + RTCH265FrameDecodeParams(RTCVideoDecoderCallback cb, int64_t ts) + : callback(cb), timestamp(ts) {} + RTCVideoDecoderCallback callback; + int64_t timestamp; +}; + +// This is the callback function that VideoToolbox calls when decode is +// complete. +void h265DecompressionOutputCallback(void* decoder, + void* params, + OSStatus status, + VTDecodeInfoFlags infoFlags, + CVImageBufferRef imageBuffer, + CMTime timestamp, + CMTime duration) { + std::unique_ptr decodeParams( + reinterpret_cast(params)); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to decode frame. Status: " << status; + return; + } + // TODO(tkchin): Handle CVO properly. + RTCCVPixelBuffer* frameBuffer = + [[RTCCVPixelBuffer alloc] initWithPixelBuffer:imageBuffer]; + RTCVideoFrame* decodedFrame = [[RTCVideoFrame alloc] + initWithBuffer:frameBuffer + rotation:RTCVideoRotation_0 + timeStampNs:CMTimeGetSeconds(timestamp) * rtc::kNumNanosecsPerSec]; + decodedFrame.timeStamp = decodeParams->timestamp; + decodeParams->callback(decodedFrame); +} + +// Decoder. +@implementation RTCVideoDecoderH265 { + CMVideoFormatDescriptionRef _videoFormat; + VTDecompressionSessionRef _decompressionSession; + RTCVideoDecoderCallback _callback; + OSStatus _error; +} + +- (instancetype)init { + if (self = [super init]) { +#if defined(WEBRTC_IOS) + [RTCUIApplicationStatusObserver prepareForUse]; +#endif + } + + return self; +} + +- (void)dealloc { + [self destroyDecompressionSession]; + [self setVideoFormat:nullptr]; +} + +- (NSInteger)startDecodeWithSettings:(RTCVideoEncoderSettings*)settings + numberOfCores:(int)numberOfCores { + return WEBRTC_VIDEO_CODEC_OK; +} + +- (NSInteger)decode:(RTCEncodedImage*)inputImage + missingFrames:(BOOL)missingFrames + codecSpecificInfo:(__nullable id)info + renderTimeMs:(int64_t)renderTimeMs { + RTC_DCHECK(inputImage.buffer); + + if (_error != noErr) { + RTC_LOG(LS_WARNING) << "Last frame decode failed."; + _error = noErr; + return WEBRTC_VIDEO_CODEC_ERROR; + } + +#if defined(WEBRTC_IOS) + if (![[RTCUIApplicationStatusObserver sharedInstance] isApplicationActive]) { + // Ignore all decode requests when app isn't active. In this state, the + // hardware decoder has been invalidated by the OS. + // Reset video format so that we won't process frames until the next + // keyframe. + [self setVideoFormat:nullptr]; + return WEBRTC_VIDEO_CODEC_NO_OUTPUT; + } +#endif + rtc::ScopedCFTypeRef inputFormat = + rtc::ScopedCF(webrtc::CreateH265VideoFormatDescription( + (uint8_t*)inputImage.buffer.bytes, inputImage.buffer.length)); + if (inputFormat) { + CMVideoDimensions dimensions = + CMVideoFormatDescriptionGetDimensions(inputFormat.get()); + RTC_LOG(LS_INFO) << "Resolution: " << dimensions.width << " x " + << dimensions.height; + // Check if the video format has changed, and reinitialize decoder if + // needed. + if (!CMFormatDescriptionEqual(inputFormat.get(), _videoFormat)) { + [self setVideoFormat:inputFormat.get()]; + int resetDecompressionSessionError = [self resetDecompressionSession]; + if (resetDecompressionSessionError != WEBRTC_VIDEO_CODEC_OK) { + return resetDecompressionSessionError; + } + } + } + if (!_videoFormat) { + // We received a frame but we don't have format information so we can't + // decode it. + // This can happen after backgrounding. We need to wait for the next + // sps/pps before we can resume so we request a keyframe by returning an + // error. + RTC_LOG(LS_WARNING) << "Missing video format. Frame with sps/pps required."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + CMSampleBufferRef sampleBuffer = nullptr; + if (!webrtc::H265AnnexBBufferToCMSampleBuffer( + (uint8_t*)inputImage.buffer.bytes, inputImage.buffer.length, + _videoFormat, &sampleBuffer)) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + RTC_DCHECK(sampleBuffer); + VTDecodeFrameFlags decodeFlags = + kVTDecodeFrame_EnableAsynchronousDecompression; + std::unique_ptr frameDecodeParams; + frameDecodeParams.reset( + new RTCH265FrameDecodeParams(_callback, inputImage.timeStamp)); + OSStatus status = VTDecompressionSessionDecodeFrame( + _decompressionSession, sampleBuffer, decodeFlags, + frameDecodeParams.release(), nullptr); +#if defined(WEBRTC_IOS) + // Re-initialize the decoder if we have an invalid session while the app is + // active and retry the decode request. + if (status == kVTInvalidSessionErr && + [self resetDecompressionSession] == WEBRTC_VIDEO_CODEC_OK) { + frameDecodeParams.reset( + new RTCH265FrameDecodeParams(_callback, inputImage.timeStamp)); + status = VTDecompressionSessionDecodeFrame( + _decompressionSession, sampleBuffer, decodeFlags, + frameDecodeParams.release(), nullptr); + } +#endif + CFRelease(sampleBuffer); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to decode frame with code: " << status; + return WEBRTC_VIDEO_CODEC_ERROR; + } + return WEBRTC_VIDEO_CODEC_OK; +} + +- (void)setCallback:(RTCVideoDecoderCallback)callback { + _callback = callback; +} + +- (NSInteger)releaseDecoder { + // Need to invalidate the session so that callbacks no longer occur and it + // is safe to null out the callback. + [self destroyDecompressionSession]; + [self setVideoFormat:nullptr]; + _callback = nullptr; + return WEBRTC_VIDEO_CODEC_OK; +} + +#pragma mark - Private + +- (int)resetDecompressionSession { + [self destroyDecompressionSession]; + + // Need to wait for the first SPS to initialize decoder. + if (!_videoFormat) { + return WEBRTC_VIDEO_CODEC_OK; + } + + // Set keys for OpenGL and IOSurface compatibilty, which makes the encoder + // create pixel buffers with GPU backed memory. The intent here is to pass + // the pixel buffers directly so we avoid a texture upload later during + // rendering. This currently is moot because we are converting back to an + // I420 frame after decode, but eventually we will be able to plumb + // CVPixelBuffers directly to the renderer. + // TODO(tkchin): Maybe only set OpenGL/IOSurface keys if we know that that + // we can pass CVPixelBuffers as native handles in decoder output. + static size_t const attributesSize = 3; + CFTypeRef keys[attributesSize] = { +#if defined(WEBRTC_IOS) + kCVPixelBufferOpenGLESCompatibilityKey, +#elif defined(WEBRTC_MAC) + kCVPixelBufferOpenGLCompatibilityKey, +#endif + kCVPixelBufferIOSurfacePropertiesKey, + kCVPixelBufferPixelFormatTypeKey + }; + CFDictionaryRef ioSurfaceValue = CreateCFTypeDictionary(nullptr, nullptr, 0); + int64_t nv12type = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange; + CFNumberRef pixelFormat = + CFNumberCreate(nullptr, kCFNumberLongType, &nv12type); + CFTypeRef values[attributesSize] = {kCFBooleanTrue, ioSurfaceValue, + pixelFormat}; + CFDictionaryRef attributes = + CreateCFTypeDictionary(keys, values, attributesSize); + if (ioSurfaceValue) { + CFRelease(ioSurfaceValue); + ioSurfaceValue = nullptr; + } + if (pixelFormat) { + CFRelease(pixelFormat); + pixelFormat = nullptr; + } + VTDecompressionOutputCallbackRecord record = { + h265DecompressionOutputCallback, + nullptr, + }; + OSStatus status = + VTDecompressionSessionCreate(nullptr, _videoFormat, nullptr, attributes, + &record, &_decompressionSession); + CFRelease(attributes); + if (status != noErr) { + [self destroyDecompressionSession]; + return WEBRTC_VIDEO_CODEC_ERROR; + } + [self configureDecompressionSession]; + + return WEBRTC_VIDEO_CODEC_OK; +} + +- (void)configureDecompressionSession { + RTC_DCHECK(_decompressionSession); +#if defined(WEBRTC_IOS) + // VTSessionSetProperty(_decompressionSession, + // kVTDecompressionPropertyKey_RealTime, kCFBooleanTrue); +#endif +} + +- (void)destroyDecompressionSession { + if (_decompressionSession) { +#if defined(WEBRTC_IOS) + if ([UIDevice isIOS11OrLater]) { + VTDecompressionSessionWaitForAsynchronousFrames(_decompressionSession); + } +#endif + VTDecompressionSessionInvalidate(_decompressionSession); + CFRelease(_decompressionSession); + _decompressionSession = nullptr; + } +} + +- (void)setVideoFormat:(CMVideoFormatDescriptionRef)videoFormat { + if (_videoFormat == videoFormat) { + return; + } + if (_videoFormat) { + CFRelease(_videoFormat); + } + _videoFormat = videoFormat; + if (_videoFormat) { + CFRetain(_videoFormat); + } +} + +- (NSString*)implementationName { + return @"VideoToolbox"; +} + +@end diff --git a/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH265.mm b/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH265.mm new file mode 100644 index 00000000000..f726f30ed49 --- /dev/null +++ b/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH265.mm @@ -0,0 +1,615 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#import "WebRTC/RTCVideoCodecH265.h" + +#import +#include + +#if defined(WEBRTC_IOS) +#import "WebRTC/UIDevice+RTCDevice.h" +#import "sdk/objc/Framework/Classes/Common/RTCUIApplicationStatusObserver.h" +#endif +#import "WebRTC/RTCVideoCodec.h" +#import "WebRTC/RTCVideoFrame.h" +#import "WebRTC/RTCVideoFrameBuffer.h" +#include "common_video/h264/profile_level_id.h" +#include "common_video/include/bitrate_adjuster.h" +#include "libyuv/convert_from.h" +#include "modules/include/module_common_types.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "rtc_base/buffer.h" +#include "rtc_base/logging.h" +#include "rtc_base/timeutils.h" +#import "sdk/objc/Framework/Classes/PeerConnection/RTCVideoCodec+Private.h" +#import "sdk/objc/Framework/Classes/VideoToolbox/helpers.h" +#include "sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.h" +#include "system_wrappers/include/clock.h" + +@interface RTCVideoEncoderH265 () + +- (void)frameWasEncoded:(OSStatus)status + flags:(VTEncodeInfoFlags)infoFlags + sampleBuffer:(CMSampleBufferRef)sampleBuffer + width:(int32_t)width + height:(int32_t)height + renderTimeMs:(int64_t)renderTimeMs + timestamp:(uint32_t)timestamp + rotation:(RTCVideoRotation)rotation; + +@end + +namespace { // anonymous namespace + +// The ratio between kVTCompressionPropertyKey_DataRateLimits and +// kVTCompressionPropertyKey_AverageBitRate. The data rate limit is set higher +// than the average bit rate to avoid undershooting the target. +const float kLimitToAverageBitRateFactor = 1.5f; +// These thresholds deviate from the default h265 QP thresholds, as they +// have been found to work better on devices that support VideoToolbox +const int kLowh265QpThreshold = 28; +const int kHighh265QpThreshold = 39; + +// Struct that we pass to the encoder per frame to encode. We receive it again +// in the encoder callback. +struct RTCFrameEncodeParams { + RTCFrameEncodeParams(RTCVideoEncoderH265* e, + int32_t w, + int32_t h, + int64_t rtms, + uint32_t ts, + RTCVideoRotation r) + : encoder(e), + width(w), + height(h), + render_time_ms(rtms), + timestamp(ts), + rotation(r) {} + + RTCVideoEncoderH265* encoder; + int32_t width; + int32_t height; + int64_t render_time_ms; + uint32_t timestamp; + RTCVideoRotation rotation; +}; + +// We receive I420Frames as input, but we need to feed CVPixelBuffers into the +// encoder. This performs the copy and format conversion. +// TODO(tkchin): See if encoder will accept i420 frames and compare performance. +bool CopyVideoFrameToPixelBuffer(id frameBuffer, + CVPixelBufferRef pixelBuffer) { + RTC_DCHECK(pixelBuffer); + RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(pixelBuffer), + kCVPixelFormatType_420YpCbCr8BiPlanarFullRange); + RTC_DCHECK_EQ(CVPixelBufferGetHeightOfPlane(pixelBuffer, 0), + frameBuffer.height); + RTC_DCHECK_EQ(CVPixelBufferGetWidthOfPlane(pixelBuffer, 0), + frameBuffer.width); + + CVReturn cvRet = CVPixelBufferLockBaseAddress(pixelBuffer, 0); + if (cvRet != kCVReturnSuccess) { + RTC_LOG(LS_ERROR) << "Failed to lock base address: " << cvRet; + return false; + } + + uint8_t* dstY = reinterpret_cast( + CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0)); + int dstStrideY = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0); + uint8_t* dstUV = reinterpret_cast( + CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1)); + int dstStrideUV = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 1); + // Convert I420 to NV12. + int ret = libyuv::I420ToNV12( + frameBuffer.dataY, frameBuffer.strideY, frameBuffer.dataU, + frameBuffer.strideU, frameBuffer.dataV, frameBuffer.strideV, dstY, + dstStrideY, dstUV, dstStrideUV, frameBuffer.width, frameBuffer.height); + CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); + if (ret) { + RTC_LOG(LS_ERROR) << "Error converting I420 VideoFrame to NV12 :" << ret; + return false; + } + return true; +} + +CVPixelBufferRef CreatePixelBuffer(CVPixelBufferPoolRef pixel_buffer_pool) { + if (!pixel_buffer_pool) { + RTC_LOG(LS_ERROR) << "Failed to get pixel buffer pool."; + return nullptr; + } + CVPixelBufferRef pixel_buffer; + CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(nullptr, pixel_buffer_pool, + &pixel_buffer); + if (ret != kCVReturnSuccess) { + RTC_LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret; + // We probably want to drop frames here, since failure probably means + // that the pool is empty. + return nullptr; + } + return pixel_buffer; +} + +// This is the callback function that VideoToolbox calls when encode is +// complete. From inspection this happens on its own queue. +void compressionOutputCallback(void* encoder, + void* params, + OSStatus status, + VTEncodeInfoFlags infoFlags, + CMSampleBufferRef sampleBuffer) { + RTC_CHECK(params); + std::unique_ptr encodeParams( + reinterpret_cast(params)); + RTC_CHECK(encodeParams->encoder); + [encodeParams->encoder frameWasEncoded:status + flags:infoFlags + sampleBuffer:sampleBuffer + width:encodeParams->width + height:encodeParams->height + renderTimeMs:encodeParams->render_time_ms + timestamp:encodeParams->timestamp + rotation:encodeParams->rotation]; +} +} // namespace + +@implementation RTCVideoEncoderH265 { + RTCVideoCodecInfo* _codecInfo; + std::unique_ptr _bitrateAdjuster; + uint32_t _targetBitrateBps; + uint32_t _encoderBitrateBps; + CFStringRef _profile; + RTCVideoEncoderCallback _callback; + int32_t _width; + int32_t _height; + VTCompressionSessionRef _compressionSession; + RTCVideoCodecMode _mode; + int framesLeft; + + std::vector _nv12ScaleBuffer; +} + +// .5 is set as a mininum to prevent overcompensating for large temporary +// overshoots. We don't want to degrade video quality too badly. +// .95 is set to prevent oscillations. When a lower bitrate is set on the +// encoder than previously set, its output seems to have a brief period of +// drastically reduced bitrate, so we want to avoid that. In steady state +// conditions, 0.95 seems to give us better overall bitrate over long periods +// of time. +- (instancetype)initWithCodecInfo:(RTCVideoCodecInfo*)codecInfo { + if (self = [super init]) { + _codecInfo = codecInfo; + _bitrateAdjuster.reset(new webrtc::BitrateAdjuster(.5, .95)); + RTC_CHECK([codecInfo.name isEqualToString:@"H265"]); + +#if defined(WEBRTC_IOS) + [RTCUIApplicationStatusObserver prepareForUse]; +#endif + } + return self; +} + +- (void)dealloc { + [self destroyCompressionSession]; +} + +- (NSInteger)startEncodeWithSettings:(RTCVideoEncoderSettings*)settings + numberOfCores:(int)numberOfCores { + RTC_DCHECK(settings); + RTC_DCHECK([settings.name isEqualToString:@"H265"]); + + _width = settings.width; + _height = settings.height; + _mode = settings.mode; + + // We can only set average bitrate on the HW encoder. + _targetBitrateBps = settings.startBitrate; + _bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps); + + // TODO(tkchin): Try setting payload size via + // kVTCompressionPropertyKey_Maxh265SliceBytes. + + return [self resetCompressionSession]; +} + +- (NSInteger)encode:(RTCVideoFrame*)frame + codecSpecificInfo:(id)codecSpecificInfo + frameTypes:(NSArray*)frameTypes { + RTC_DCHECK_EQ(frame.width, _width); + RTC_DCHECK_EQ(frame.height, _height); + if (!_callback || !_compressionSession) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } +#if defined(WEBRTC_IOS) + if (![[RTCUIApplicationStatusObserver sharedInstance] isApplicationActive]) { + // Ignore all encode requests when app isn't active. In this state, the + // hardware encoder has been invalidated by the OS. + return WEBRTC_VIDEO_CODEC_OK; + } +#endif + BOOL isKeyframeRequired = NO; + + // Get a pixel buffer from the pool and copy frame data over. + CVPixelBufferPoolRef pixelBufferPool = + VTCompressionSessionGetPixelBufferPool(_compressionSession); + +#if defined(WEBRTC_IOS) + if (!pixelBufferPool) { + // Kind of a hack. On backgrounding, the compression session seems to get + // invalidated, which causes this pool call to fail when the application + // is foregrounded and frames are being sent for encoding again. + // Resetting the session when this happens fixes the issue. + // In addition we request a keyframe so video can recover quickly. + [self resetCompressionSession]; + pixelBufferPool = + VTCompressionSessionGetPixelBufferPool(_compressionSession); + isKeyframeRequired = YES; + RTC_LOG(LS_INFO) << "Resetting compression session due to invalid pool."; + } +#endif + + CVPixelBufferRef pixelBuffer = nullptr; + if ([frame.buffer isKindOfClass:[RTCCVPixelBuffer class]]) { + // Native frame buffer + RTCCVPixelBuffer* rtcPixelBuffer = (RTCCVPixelBuffer*)frame.buffer; + if (![rtcPixelBuffer requiresCropping]) { + // This pixel buffer might have a higher resolution than what the + // compression session is configured to. The compression session can + // handle that and will output encoded frames in the configured + // resolution regardless of the input pixel buffer resolution. + pixelBuffer = rtcPixelBuffer.pixelBuffer; + CVBufferRetain(pixelBuffer); + } else { + // Cropping required, we need to crop and scale to a new pixel buffer. + pixelBuffer = CreatePixelBuffer(pixelBufferPool); + if (!pixelBuffer) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + int dstWidth = CVPixelBufferGetWidth(pixelBuffer); + int dstHeight = CVPixelBufferGetHeight(pixelBuffer); + if ([rtcPixelBuffer requiresScalingToWidth:dstWidth height:dstHeight]) { + int size = + [rtcPixelBuffer bufferSizeForCroppingAndScalingToWidth:dstWidth + height:dstHeight]; + _nv12ScaleBuffer.resize(size); + } else { + _nv12ScaleBuffer.clear(); + } + _nv12ScaleBuffer.shrink_to_fit(); + if (![rtcPixelBuffer cropAndScaleTo:pixelBuffer + withTempBuffer:_nv12ScaleBuffer.data()]) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + } + } + + if (!pixelBuffer) { + // We did not have a native frame buffer + pixelBuffer = CreatePixelBuffer(pixelBufferPool); + if (!pixelBuffer) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + RTC_DCHECK(pixelBuffer); + if (!CopyVideoFrameToPixelBuffer([frame.buffer toI420], pixelBuffer)) { + RTC_LOG(LS_ERROR) << "Failed to copy frame data."; + CVBufferRelease(pixelBuffer); + return WEBRTC_VIDEO_CODEC_ERROR; + } + } + + // Check if we need a keyframe. + if (!isKeyframeRequired && frameTypes) { + for (NSNumber* frameType in frameTypes) { + if ((RTCFrameType)frameType.intValue == RTCFrameTypeVideoFrameKey) { + isKeyframeRequired = YES; + break; + } + } + } + + CMTime presentationTimeStamp = + CMTimeMake(frame.timeStampNs / rtc::kNumNanosecsPerMillisec, 1000); + CFDictionaryRef frameProperties = nullptr; + if (isKeyframeRequired) { + CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame}; + CFTypeRef values[] = {kCFBooleanTrue}; + frameProperties = CreateCFTypeDictionary(keys, values, 1); + } + + std::unique_ptr encodeParams; + encodeParams.reset(new RTCFrameEncodeParams( + self, _width, _height, frame.timeStampNs / rtc::kNumNanosecsPerMillisec, + frame.timeStamp, frame.rotation)); + + // Update the bitrate if needed. + [self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps()]; + + OSStatus status = VTCompressionSessionEncodeFrame( + _compressionSession, pixelBuffer, presentationTimeStamp, kCMTimeInvalid, + frameProperties, encodeParams.release(), nullptr); + if (frameProperties) { + CFRelease(frameProperties); + } + if (pixelBuffer) { + CVBufferRelease(pixelBuffer); + } + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to encode frame with code: " << status; + return WEBRTC_VIDEO_CODEC_ERROR; + } + return WEBRTC_VIDEO_CODEC_OK; +} + +- (void)setCallback:(RTCVideoEncoderCallback)callback { + _callback = callback; +} + +- (int)setBitrate:(uint32_t)bitrateKbit framerate:(uint32_t)framerate { + _targetBitrateBps = 1000 * bitrateKbit; + _bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps); + [self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps()]; + return WEBRTC_VIDEO_CODEC_OK; +} + +#pragma mark - Private + +- (NSInteger)releaseEncoder { + // Need to destroy so that the session is invalidated and won't use the + // callback anymore. Do not remove callback until the session is invalidated + // since async encoder callbacks can occur until invalidation. + [self destroyCompressionSession]; + _callback = nullptr; + return WEBRTC_VIDEO_CODEC_OK; +} + +- (int)resetCompressionSession { + [self destroyCompressionSession]; + + // Set source image buffer attributes. These attributes will be present on + // buffers retrieved from the encoder's pixel buffer pool. + const size_t attributesSize = 3; + CFTypeRef keys[attributesSize] = { +#if defined(WEBRTC_IOS) + kCVPixelBufferOpenGLESCompatibilityKey, +#elif defined(WEBRTC_MAC) + kCVPixelBufferOpenGLCompatibilityKey, +#endif + kCVPixelBufferIOSurfacePropertiesKey, + kCVPixelBufferPixelFormatTypeKey + }; + CFDictionaryRef ioSurfaceValue = CreateCFTypeDictionary(nullptr, nullptr, 0); + int64_t nv12type = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange; + CFNumberRef pixelFormat = + CFNumberCreate(nullptr, kCFNumberLongType, &nv12type); + CFTypeRef values[attributesSize] = {kCFBooleanTrue, ioSurfaceValue, + pixelFormat}; + CFDictionaryRef sourceAttributes = + CreateCFTypeDictionary(keys, values, attributesSize); + if (ioSurfaceValue) { + CFRelease(ioSurfaceValue); + ioSurfaceValue = nullptr; + } + if (pixelFormat) { + CFRelease(pixelFormat); + pixelFormat = nullptr; + } + CFMutableDictionaryRef encoder_specs = nullptr; +#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) + // Currently hw accl is supported above 360p on mac, below 360p + // the compression session will be created with hw accl disabled. + encoder_specs = + CFDictionaryCreateMutable(nullptr, 1, &kCFTypeDictionaryKeyCallBacks, + &kCFTypeDictionaryValueCallBacks); + CFDictionarySetValue( + encoder_specs, + kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder, + kCFBooleanTrue); +#endif + OSStatus status = VTCompressionSessionCreate( + nullptr, // use default allocator + _width, _height, kCMVideoCodecType_HEVC, + encoder_specs, // use hardware accelerated encoder if available + sourceAttributes, + nullptr, // use default compressed data allocator + compressionOutputCallback, nullptr, &_compressionSession); + if (sourceAttributes) { + CFRelease(sourceAttributes); + sourceAttributes = nullptr; + } + if (encoder_specs) { + CFRelease(encoder_specs); + encoder_specs = nullptr; + } + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to create compression session: " << status; + return WEBRTC_VIDEO_CODEC_ERROR; + } +#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) + CFBooleanRef hwaccl_enabled = nullptr; + status = VTSessionCopyProperty( + _compressionSession, + kVTCompressionPropertyKey_UsingHardwareAcceleratedVideoEncoder, nullptr, + &hwaccl_enabled); + if (status == noErr && (CFBooleanGetValue(hwaccl_enabled))) { + RTC_LOG(LS_INFO) << "Compression session created with hw accl enabled"; + } else { + RTC_LOG(LS_INFO) << "Compression session created with hw accl disabled"; + } +#endif + [self configureCompressionSession]; + return WEBRTC_VIDEO_CODEC_OK; +} + +- (void)configureCompressionSession { + RTC_DCHECK(_compressionSession); + SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_RealTime, + false); + // SetVTSessionProperty(_compressionSession, + // kVTCompressionPropertyKey_ProfileLevel, _profile); + SetVTSessionProperty(_compressionSession, + kVTCompressionPropertyKey_AllowFrameReordering, false); + [self setEncoderBitrateBps:_targetBitrateBps]; + // TODO(tkchin): Look at entropy mode and colorspace matrices. + // TODO(tkchin): Investigate to see if there's any way to make this work. + // May need it to interop with Android. Currently this call just fails. + // On inspecting encoder output on iOS8, this value is set to 6. + // internal::SetVTSessionProperty(compression_session_, + // kVTCompressionPropertyKey_MaxFrameDelayCount, + // 1); + + // Set a relatively large value for keyframe emission (7200 frames or 4 + // minutes). + SetVTSessionProperty(_compressionSession, + kVTCompressionPropertyKey_MaxKeyFrameInterval, 7200); + SetVTSessionProperty(_compressionSession, + kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration, + 240); + OSStatus status = + VTCompressionSessionPrepareToEncodeFrames(_compressionSession); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Compression session failed to prepare encode frames."; + } +} + +- (void)destroyCompressionSession { + if (_compressionSession) { + VTCompressionSessionInvalidate(_compressionSession); + CFRelease(_compressionSession); + _compressionSession = nullptr; + } +} + +- (NSString*)implementationName { + return @"VideoToolbox"; +} + +- (void)setBitrateBps:(uint32_t)bitrateBps { + if (_encoderBitrateBps != bitrateBps) { + [self setEncoderBitrateBps:bitrateBps]; + } +} + +- (void)setEncoderBitrateBps:(uint32_t)bitrateBps { + if (_compressionSession) { + SetVTSessionProperty(_compressionSession, + kVTCompressionPropertyKey_AverageBitRate, bitrateBps); + + // TODO(tkchin): Add a helper method to set array value. + int64_t dataLimitBytesPerSecondValue = + static_cast(bitrateBps * kLimitToAverageBitRateFactor / 8); + CFNumberRef bytesPerSecond = + CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, + &dataLimitBytesPerSecondValue); + int64_t oneSecondValue = 1; + CFNumberRef oneSecond = CFNumberCreate( + kCFAllocatorDefault, kCFNumberSInt64Type, &oneSecondValue); + const void* nums[2] = {bytesPerSecond, oneSecond}; + CFArrayRef dataRateLimits = + CFArrayCreate(nullptr, nums, 2, &kCFTypeArrayCallBacks); + OSStatus status = VTSessionSetProperty( + _compressionSession, kVTCompressionPropertyKey_DataRateLimits, + dataRateLimits); + if (bytesPerSecond) { + CFRelease(bytesPerSecond); + } + if (oneSecond) { + CFRelease(oneSecond); + } + if (dataRateLimits) { + CFRelease(dataRateLimits); + } + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to set data rate limit"; + } + + _encoderBitrateBps = bitrateBps; + } +} + +- (void)frameWasEncoded:(OSStatus)status + flags:(VTEncodeInfoFlags)infoFlags + sampleBuffer:(CMSampleBufferRef)sampleBuffer + width:(int32_t)width + height:(int32_t)height + renderTimeMs:(int64_t)renderTimeMs + timestamp:(uint32_t)timestamp + rotation:(RTCVideoRotation)rotation { + if (status != noErr) { + RTC_LOG(LS_ERROR) << "h265 encode failed."; + return; + } + if (infoFlags & kVTEncodeInfo_FrameDropped) { + RTC_LOG(LS_INFO) << "h265 encoder dropped a frame."; + return; + } + + BOOL isKeyframe = NO; + CFArrayRef attachments = + CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, 0); + if (attachments != nullptr && CFArrayGetCount(attachments)) { + CFDictionaryRef attachment = + static_cast(CFArrayGetValueAtIndex(attachments, 0)); + isKeyframe = + !CFDictionaryContainsKey(attachment, kCMSampleAttachmentKey_NotSync); + } + + if (isKeyframe) { + RTC_LOG(LS_INFO) << "Generated keyframe"; + } + + // Convert the sample buffer into a buffer suitable for RTP packetization. + // TODO(tkchin): Allocate buffers through a pool. + std::unique_ptr buffer(new rtc::Buffer()); + RTCRtpFragmentationHeader* header; + { + std::unique_ptr header_cpp; + bool result = H265CMSampleBufferToAnnexBBuffer(sampleBuffer, isKeyframe, + buffer.get(), &header_cpp); + header = [[RTCRtpFragmentationHeader alloc] + initWithNativeFragmentationHeader:header_cpp.get()]; + if (!result) { + RTC_LOG(LS_ERROR) << "Failed to convert sample buffer."; + return; + } + } + + RTCEncodedImage* frame = [[RTCEncodedImage alloc] init]; + frame.buffer = [NSData dataWithBytesNoCopy:buffer->data() + length:buffer->size() + freeWhenDone:NO]; + frame.encodedWidth = width; + frame.encodedHeight = height; + frame.completeFrame = YES; + frame.frameType = + isKeyframe ? RTCFrameTypeVideoFrameKey : RTCFrameTypeVideoFrameDelta; + frame.captureTimeMs = renderTimeMs; + frame.timeStamp = timestamp; + frame.rotation = rotation; + frame.contentType = (_mode == RTCVideoCodecModeScreensharing) + ? RTCVideoContentTypeScreenshare + : RTCVideoContentTypeUnspecified; + frame.flags = webrtc::VideoSendTiming::kInvalid; + + // TODO: QP is ignored because of there is no H.265 bitstream parser. + + BOOL res = _callback(frame, [[RTCCodecSpecificInfoH265 alloc] init], header); + if (!res) { + RTC_LOG(LS_ERROR) << "Encode callback failed."; + return; + } + _bitrateAdjuster->Update(frame.buffer.length); +} + +- (RTCVideoEncoderQpThresholds*)scalingSettings { + return [[RTCVideoEncoderQpThresholds alloc] + initWithThresholdsLow:kLowh265QpThreshold + high:kHighh265QpThreshold]; +} + +@end diff --git a/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.cc b/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.cc index 479ef3c6bba..8c8e2abdd5e 100644 --- a/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.cc +++ b/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.cc @@ -248,6 +248,236 @@ bool H264AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, return true; } + +bool H265CMSampleBufferToAnnexBBuffer( + CMSampleBufferRef hvcc_sample_buffer, + bool is_keyframe, + rtc::Buffer* annexb_buffer, + std::unique_ptr *out_header) { + RTC_DCHECK(hvcc_sample_buffer); + RTC_DCHECK(out_header); + out_header->reset(nullptr); + + // Get format description from the sample buffer. + CMVideoFormatDescriptionRef description = + CMSampleBufferGetFormatDescription(hvcc_sample_buffer); + if (description == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to get sample buffer's description."; + return false; + } + + // Get parameter set information. + int nalu_header_size = 0; + size_t param_set_count = 0; + OSStatus status = CMVideoFormatDescriptionGetHEVCParameterSetAtIndex( + description, 0, nullptr, nullptr, ¶m_set_count, &nalu_header_size); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to get parameter set."; + return false; + } + RTC_CHECK_EQ(nalu_header_size, kAvccHeaderByteSize); + RTC_DCHECK_EQ(param_set_count, 3); + + // Truncate any previous data in the buffer without changing its capacity. + annexb_buffer->SetSize(0); + + size_t nalu_offset = 0; + std::vector frag_offsets; + std::vector frag_lengths; + + // Place all parameter sets at the front of buffer. + if (is_keyframe) { + size_t param_set_size = 0; + const uint8_t* param_set = nullptr; + for (size_t i = 0; i < param_set_count; ++i) { + status = CMVideoFormatDescriptionGetHEVCParameterSetAtIndex( + description, i, ¶m_set, ¶m_set_size, nullptr, nullptr); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to get parameter set."; + return false; + } + // Update buffer. + annexb_buffer->AppendData(kAnnexBHeaderBytes, sizeof(kAnnexBHeaderBytes)); + annexb_buffer->AppendData(reinterpret_cast(param_set), + param_set_size); + // Update fragmentation. + frag_offsets.push_back(nalu_offset + sizeof(kAnnexBHeaderBytes)); + frag_lengths.push_back(param_set_size); + nalu_offset += sizeof(kAnnexBHeaderBytes) + param_set_size; + } + } + + // Get block buffer from the sample buffer. + CMBlockBufferRef block_buffer = + CMSampleBufferGetDataBuffer(hvcc_sample_buffer); + if (block_buffer == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to get sample buffer's block buffer."; + return false; + } + CMBlockBufferRef contiguous_buffer = nullptr; + // Make sure block buffer is contiguous. + if (!CMBlockBufferIsRangeContiguous(block_buffer, 0, 0)) { + status = CMBlockBufferCreateContiguous( + nullptr, block_buffer, nullptr, nullptr, 0, 0, 0, &contiguous_buffer); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to flatten non-contiguous block buffer: " + << status; + return false; + } + } else { + contiguous_buffer = block_buffer; + // Retain to make cleanup easier. + CFRetain(contiguous_buffer); + block_buffer = nullptr; + } + + // Now copy the actual data. + char* data_ptr = nullptr; + size_t block_buffer_size = CMBlockBufferGetDataLength(contiguous_buffer); + status = CMBlockBufferGetDataPointer(contiguous_buffer, 0, nullptr, nullptr, + &data_ptr); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to get block buffer data."; + CFRelease(contiguous_buffer); + return false; + } + size_t bytes_remaining = block_buffer_size; + while (bytes_remaining > 0) { + // The size type here must match |nalu_header_size|, we expect 4 bytes. + // Read the length of the next packet of data. Must convert from big endian + // to host endian. + RTC_DCHECK_GE(bytes_remaining, (size_t)nalu_header_size); + uint32_t* uint32_data_ptr = reinterpret_cast(data_ptr); + uint32_t packet_size = CFSwapInt32BigToHost(*uint32_data_ptr); + // Update buffer. + annexb_buffer->AppendData(kAnnexBHeaderBytes, sizeof(kAnnexBHeaderBytes)); + annexb_buffer->AppendData(data_ptr + nalu_header_size, packet_size); + // Update fragmentation. + frag_offsets.push_back(nalu_offset + sizeof(kAnnexBHeaderBytes)); + frag_lengths.push_back(packet_size); + nalu_offset += sizeof(kAnnexBHeaderBytes) + packet_size; + + size_t bytes_written = packet_size + sizeof(kAnnexBHeaderBytes); + bytes_remaining -= bytes_written; + data_ptr += bytes_written; + } + RTC_DCHECK_EQ(bytes_remaining, (size_t)0); + + std::unique_ptr header(new RTPFragmentationHeader()); + header->VerifyAndAllocateFragmentationHeader(frag_offsets.size()); + RTC_DCHECK_EQ(frag_lengths.size(), frag_offsets.size()); + for (size_t i = 0; i < frag_offsets.size(); ++i) { + header->fragmentationOffset[i] = frag_offsets[i]; + header->fragmentationLength[i] = frag_lengths[i]; + header->fragmentationPlType[i] = 0; + header->fragmentationTimeDiff[i] = 0; + } + *out_header = std::move(header); + CFRelease(contiguous_buffer); + return true; +} + +bool H265AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, + size_t annexb_buffer_size, + CMVideoFormatDescriptionRef video_format, + CMSampleBufferRef* out_sample_buffer) { + RTC_DCHECK(annexb_buffer); + RTC_DCHECK(out_sample_buffer); + RTC_DCHECK(video_format); + *out_sample_buffer = nullptr; + + AnnexBBufferReader reader(annexb_buffer, annexb_buffer_size); + if (reader.SeekToNextNaluOfType(H265::kVps)) { + // Buffer contains an SPS NALU - skip it and the following PPS + const uint8_t* data; + size_t data_len; + if (!reader.ReadNalu(&data, &data_len)) { + RTC_LOG(LS_ERROR) << "Failed to read VPS"; + return false; + } + if (!reader.ReadNalu(&data, &data_len)) { + RTC_LOG(LS_ERROR) << "Failed to read SPS"; + return false; + } + if (!reader.ReadNalu(&data, &data_len)) { + RTC_LOG(LS_ERROR) << "Failed to read PPS"; + return false; + } + if (reader.SeekToNextNaluOfType(H265::kPrefixSei)) { + if (!reader.ReadNalu(&data, &data_len)) { + RTC_LOG(LS_ERROR) << "Failed to read SEI"; + return false; + } + } + } else { + // No SPS NALU - start reading from the first NALU in the buffer + reader.SeekToStart(); + } + + // Allocate memory as a block buffer. + // TODO(tkchin): figure out how to use a pool. + CMBlockBufferRef block_buffer = nullptr; + OSStatus status = CMBlockBufferCreateWithMemoryBlock( + nullptr, nullptr, reader.BytesRemaining(), nullptr, nullptr, 0, + reader.BytesRemaining(), kCMBlockBufferAssureMemoryNowFlag, + &block_buffer); + if (status != kCMBlockBufferNoErr) { + RTC_LOG(LS_ERROR) << "Failed to create block buffer."; + return false; + } + + // Make sure block buffer is contiguous. + CMBlockBufferRef contiguous_buffer = nullptr; + if (!CMBlockBufferIsRangeContiguous(block_buffer, 0, 0)) { + status = CMBlockBufferCreateContiguous( + nullptr, block_buffer, nullptr, nullptr, 0, 0, 0, &contiguous_buffer); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to flatten non-contiguous block buffer: " + << status; + CFRelease(block_buffer); + return false; + } + } else { + contiguous_buffer = block_buffer; + block_buffer = nullptr; + } + + // Get a raw pointer into allocated memory. + size_t block_buffer_size = 0; + char* data_ptr = nullptr; + status = CMBlockBufferGetDataPointer(contiguous_buffer, 0, nullptr, + &block_buffer_size, &data_ptr); + if (status != kCMBlockBufferNoErr) { + RTC_LOG(LS_ERROR) << "Failed to get block buffer data pointer."; + CFRelease(contiguous_buffer); + return false; + } + RTC_DCHECK(block_buffer_size == reader.BytesRemaining()); + + // Write Avcc NALUs into block buffer memory. + AvccBufferWriter writer(reinterpret_cast(data_ptr), + block_buffer_size); + while (reader.BytesRemaining() > 0) { + const uint8_t* nalu_data_ptr = nullptr; + size_t nalu_data_size = 0; + if (reader.ReadNalu(&nalu_data_ptr, &nalu_data_size)) { + writer.WriteNalu(nalu_data_ptr, nalu_data_size); + } + } + + // Create sample buffer. + status = CMSampleBufferCreate(nullptr, contiguous_buffer, true, nullptr, + nullptr, video_format, 1, 0, nullptr, 0, + nullptr, out_sample_buffer); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to create sample buffer."; + CFRelease(contiguous_buffer); + return false; + } + CFRelease(contiguous_buffer); + return true; +} + CMVideoFormatDescriptionRef CreateVideoFormatDescription( const uint8_t* annexb_buffer, size_t annexb_buffer_size) { @@ -278,6 +508,41 @@ CMVideoFormatDescriptionRef CreateVideoFormatDescription( return description; } +CMVideoFormatDescriptionRef CreateH265VideoFormatDescription( + const uint8_t* annexb_buffer, + size_t annexb_buffer_size) { + const uint8_t* param_set_ptrs[3] = {}; + size_t param_set_sizes[3] = {}; + AnnexBBufferReader reader(annexb_buffer, annexb_buffer_size); + // Skip everyting before the VPS, then read the VPS, SPS and PPS + if (!reader.SeekToNextNaluOfType(H265::kVps)) { + return nullptr; + } + if (!reader.ReadNalu(¶m_set_ptrs[0], ¶m_set_sizes[0])) { + RTC_LOG(LS_ERROR) << "Failed to read VPS"; + return nullptr; + } + if (!reader.ReadNalu(¶m_set_ptrs[1], ¶m_set_sizes[1])) { + RTC_LOG(LS_ERROR) << "Failed to read SPS"; + return nullptr; + } + if (!reader.ReadNalu(¶m_set_ptrs[2], ¶m_set_sizes[2])) { + RTC_LOG(LS_ERROR) << "Failed to read PPS"; + return nullptr; + } + + // Parse the SPS and PPS into a CMVideoFormatDescription. + CMVideoFormatDescriptionRef description = nullptr; + OSStatus status = CMVideoFormatDescriptionCreateFromHEVCParameterSets( + kCFAllocatorDefault, 3, param_set_ptrs, param_set_sizes, 4, nullptr, + &description); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to create video format description."; + return nullptr; + } + return description; +} + AnnexBBufferReader::AnnexBBufferReader(const uint8_t* annexb_buffer, size_t length) : start_(annexb_buffer), length_(length) { @@ -324,6 +589,15 @@ bool AnnexBBufferReader::SeekToNextNaluOfType(NaluType type) { } return false; } +bool AnnexBBufferReader::SeekToNextNaluOfType(H265::NaluType type) { + for (; offset_ != offsets_.end(); ++offset_) { + if (offset_->payload_size < 1) + continue; + if (H265::ParseNaluType(*(start_ + offset_->payload_start_offset)) == type) + return true; + } + return false; +} AvccBufferWriter::AvccBufferWriter(uint8_t* const avcc_buffer, size_t length) : start_(avcc_buffer), offset_(0), length_(length) { RTC_DCHECK(avcc_buffer); diff --git a/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.h b/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.h index cd5a51079ab..4bb7097f8c7 100644 --- a/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.h +++ b/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.h @@ -18,6 +18,7 @@ #include #include "common_video/h264/h264_common.h" +#include "common_video/h265/h265_common.h" #include "modules/include/module_common_types.h" #include "rtc_base/buffer.h" @@ -46,12 +47,36 @@ bool H264AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, CMVideoFormatDescriptionRef video_format, CMSampleBufferRef* out_sample_buffer); +// Converts a sample buffer emitted from the VideoToolbox encoder into a buffer +// suitable for RTP. The sample buffer is in hvcc format whereas the rtp buffer +// needs to be in Annex B format. Data is written directly to |annexb_buffer| +// and a new RTPFragmentationHeader is returned in |out_header|. +bool H265CMSampleBufferToAnnexBBuffer( + CMSampleBufferRef hvcc_sample_buffer, + bool is_keyframe, + rtc::Buffer* annexb_buffer, + std::unique_ptr *out_header); + +// Converts a buffer received from RTP into a sample buffer suitable for the +// VideoToolbox decoder. The RTP buffer is in annex b format whereas the sample +// buffer is in hvcc format. +// If |is_keyframe| is true then |video_format| is ignored since the format will +// be read from the buffer. Otherwise |video_format| must be provided. +// Caller is responsible for releasing the created sample buffer. +bool H265AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, + size_t annexb_buffer_size, + CMVideoFormatDescriptionRef video_format, + CMSampleBufferRef* out_sample_buffer); + // Returns a video format description created from the sps/pps information in // the Annex B buffer. If there is no such information, nullptr is returned. // The caller is responsible for releasing the description. CMVideoFormatDescriptionRef CreateVideoFormatDescription( const uint8_t* annexb_buffer, size_t annexb_buffer_size); +CMVideoFormatDescriptionRef CreateH265VideoFormatDescription( + const uint8_t* annexb_buffer, + size_t annexb_buffer_size); // Helper class for reading NALUs from an RTP Annex B buffer. class AnnexBBufferReader final { @@ -77,6 +102,7 @@ class AnnexBBufferReader final { // Return true if a NALU of the desired type is found, false if we // reached the end instead bool SeekToNextNaluOfType(H264::NaluType type); + bool SeekToNextNaluOfType(H265::NaluType type); private: // Returns the the next offset that contains NALU data. diff --git a/sdk/objc/Framework/Headers/WebRTC/RTCVideoCodecH265.h b/sdk/objc/Framework/Headers/WebRTC/RTCVideoCodecH265.h new file mode 100644 index 00000000000..2d340a50237 --- /dev/null +++ b/sdk/objc/Framework/Headers/WebRTC/RTCVideoCodecH265.h @@ -0,0 +1,43 @@ +/* + * Copyright 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* This file is borrowed from webrtc/sdk/objc/Framework/Headers/WebRTC/RTCVideoCodecH264.h */ + +#import + +#import +#import + +RTC_EXPORT +@interface RTCCodecSpecificInfoH265 : NSObject +@end + +/** Encoder. */ +RTC_EXPORT +@interface RTCVideoEncoderH265 : NSObject + +- (instancetype)initWithCodecInfo:(RTCVideoCodecInfo *)codecInfo; + +@end + +/** Decoder. */ +RTC_EXPORT +@interface RTCVideoDecoderH265 : NSObject +@end + +/** Encoder factory. */ +RTC_EXPORT +@interface RTCVideoEncoderFactoryH265 : NSObject +@end + +/** Decoder factory. */ +RTC_EXPORT +@interface RTCVideoDecoderFactoryH265 : NSObject +@end diff --git a/test/video_codec_settings.h b/test/video_codec_settings.h index 9ee6be222cd..4fde21b17bb 100644 --- a/test/video_codec_settings.h +++ b/test/video_codec_settings.h @@ -60,6 +60,10 @@ static void CodecSettings(VideoCodecType codec_type, VideoCodec* settings) { // TODO(brandtr): Set |qpMax| here, when the OpenH264 wrapper supports it. *(settings->H264()) = VideoEncoder::GetDefaultH264Settings(); return; + case kVideoCodecH265: + settings->codecType = kVideoCodecH265; + *(settings->H265()) = VideoEncoder::GetDefaultH265Settings(); + return; case kVideoCodecI420: // Bitrate needed for this size and framerate. settings->startBitrate =