From 3f7852105feb9047a9106984df70aa3570ca9dec Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Thu, 24 Aug 2023 19:25:15 +0900 Subject: [PATCH] Expose remote audio sample buffers on RTCAudioTrack (#84) * audio renderer protocol * basic set up * progress * update header year * impl * stereo * fail gracefully * minor fix * doc * optimize AudioStreamBasicDescription * logging * minor refactoring * weak reference to delegates * fix timestamp computation * change swift delegate signature * use os_unfair_lock instead * avoid deadlock --- sdk/BUILD.gn | 3 + .../peerconnection/RTCAudioTrack+Private.h | 10 +- sdk/objc/api/peerconnection/RTCAudioTrack.h | 8 + sdk/objc/api/peerconnection/RTCAudioTrack.mm | 215 +++++++++++++++++- sdk/objc/base/RTCAudioRenderer.h | 34 +++ 5 files changed, 264 insertions(+), 6 deletions(-) create mode 100644 sdk/objc/base/RTCAudioRenderer.h diff --git a/sdk/BUILD.gn b/sdk/BUILD.gn index 4b0f7efe89..fda1f3986f 100644 --- a/sdk/BUILD.gn +++ b/sdk/BUILD.gn @@ -119,6 +119,7 @@ if (is_ios || is_mac) { "objc/base/RTCVideoFrame.mm", "objc/base/RTCVideoFrameBuffer.h", "objc/base/RTCVideoRenderer.h", + "objc/base/RTCAudioRenderer.h", "objc/base/RTCYUVPlanarBuffer.h", ] @@ -1289,6 +1290,7 @@ if (is_ios || is_mac) { "objc/base/RTCVideoFrame.h", "objc/base/RTCVideoFrameBuffer.h", "objc/base/RTCVideoRenderer.h", + "objc/base/RTCAudioRenderer.h", "objc/base/RTCYUVPlanarBuffer.h", "objc/components/audio/RTCAudioDevice.h", "objc/components/audio/RTCAudioSession.h", @@ -1492,6 +1494,7 @@ if (is_ios || is_mac) { "objc/base/RTCVideoFrame.h", "objc/base/RTCVideoFrameBuffer.h", "objc/base/RTCVideoRenderer.h", + "objc/base/RTCAudioRenderer.h", "objc/base/RTCYUVPlanarBuffer.h", "objc/components/capturer/RTCCameraVideoCapturer.h", "objc/components/capturer/RTCFileVideoCapturer.h", diff --git a/sdk/objc/api/peerconnection/RTCAudioTrack+Private.h b/sdk/objc/api/peerconnection/RTCAudioTrack+Private.h index 6495500484..38c0bd3b1b 100644 --- a/sdk/objc/api/peerconnection/RTCAudioTrack+Private.h +++ b/sdk/objc/api/peerconnection/RTCAudioTrack+Private.h @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#import #import "RTCAudioTrack.h" #include "api/media_stream_interface.h" @@ -15,17 +16,18 @@ NS_ASSUME_NONNULL_BEGIN @class RTC_OBJC_TYPE(RTCPeerConnectionFactory); -@interface RTC_OBJC_TYPE (RTCAudioTrack) -() +@interface RTC_OBJC_TYPE (RTCAudioTrack) () - /** AudioTrackInterface created or passed in at construction. */ - @property(nonatomic, readonly) rtc::scoped_refptr nativeAudioTrack; +/** AudioTrackInterface created or passed in at construction. */ +@property(nonatomic, readonly) rtc::scoped_refptr nativeAudioTrack; /** Initialize an RTCAudioTrack with an id. */ - (instancetype)initWithFactory:(RTC_OBJC_TYPE(RTCPeerConnectionFactory) *)factory source:(RTC_OBJC_TYPE(RTCAudioSource) *)source trackId:(NSString *)trackId; +- (void)didCaptureSampleBuffer:(CMSampleBufferRef)sampleBuffer; + @end NS_ASSUME_NONNULL_END diff --git a/sdk/objc/api/peerconnection/RTCAudioTrack.h b/sdk/objc/api/peerconnection/RTCAudioTrack.h index 95eb5d3d48..c8218ad926 100644 --- a/sdk/objc/api/peerconnection/RTCAudioTrack.h +++ b/sdk/objc/api/peerconnection/RTCAudioTrack.h @@ -13,6 +13,7 @@ NS_ASSUME_NONNULL_BEGIN +@protocol RTC_OBJC_TYPE (RTCAudioRenderer); @class RTC_OBJC_TYPE(RTCAudioSource); RTC_OBJC_EXPORT @@ -23,6 +24,13 @@ RTC_OBJC_EXPORT /** The audio source for this audio track. */ @property(nonatomic, readonly) RTC_OBJC_TYPE(RTCAudioSource) * source; +/** Register a renderer that will receive all audio CMSampleBuffers on this track. + * Does not retain. */ +- (void)addRenderer:(id)renderer; + +/** Deregister a renderer */ +- (void)removeRenderer:(id)renderer; + @end NS_ASSUME_NONNULL_END diff --git a/sdk/objc/api/peerconnection/RTCAudioTrack.mm b/sdk/objc/api/peerconnection/RTCAudioTrack.mm index 5c1736f436..065064a1fe 100644 --- a/sdk/objc/api/peerconnection/RTCAudioTrack.mm +++ b/sdk/objc/api/peerconnection/RTCAudioTrack.mm @@ -8,8 +8,12 @@ * be found in the AUTHORS file in the root of the source tree. */ +#import +#import + #import "RTCAudioTrack+Private.h" +#import "RTCAudioRenderer.h" #import "RTCAudioSource+Private.h" #import "RTCMediaStreamTrack+Private.h" #import "RTCPeerConnectionFactory+Private.h" @@ -17,7 +21,167 @@ #include "rtc_base/checks.h" -@implementation RTC_OBJC_TYPE (RTCAudioTrack) +namespace webrtc { +/** + * Captures audio data and converts to CMSampleBuffers + */ +class AudioSinkConverter : public rtc::RefCountInterface, public webrtc::AudioTrackSinkInterface { + private: + os_unfair_lock *lock_; + __weak RTCAudioTrack *audio_track_; + int64_t total_frames_ = 0; + bool attached_ = false; + + public: + AudioSinkConverter(RTCAudioTrack *audioTrack, os_unfair_lock *lock) { + RTC_LOG(LS_INFO) << "RTCAudioTrack.AudioSinkConverter init"; + audio_track_ = audioTrack; + lock_ = lock; + } + + ~AudioSinkConverter() { + // + RTC_LOG(LS_INFO) << "RTCAudioTrack.AudioSinkConverter dealloc"; + } + + // Must be called while locked + void TryAttach() { + if (attached_) { + // Already attached + return; + } + RTC_LOG(LS_INFO) << "RTCAudioTrack attaching sink..."; + // Reset for creating CMSampleTimingInfo correctly + audio_track_.nativeAudioTrack->AddSink(this); + total_frames_ = 0; + attached_ = true; + } + + // Must be called while locked + void TryDetach() { + if (!attached_) { + // Already detached + return; + } + RTC_LOG(LS_INFO) << "RTCAudioTrack detaching sink..."; + audio_track_.nativeAudioTrack->RemoveSink(this); + attached_ = false; + } + + void OnData(const void *audio_data, + int bits_per_sample, + int sample_rate, + size_t number_of_channels, + size_t number_of_frames, + absl::optional absolute_capture_timestamp_ms) override { + RTC_LOG(LS_INFO) << "RTCAudioTrack.AudioSinkConverter OnData bits_per_sample: " + << bits_per_sample << " sample_rate: " << sample_rate + << " number_of_channels: " << number_of_channels + << " number_of_frames: " << number_of_frames + << " absolute_capture_timestamp_ms: " + << (absolute_capture_timestamp_ms ? absolute_capture_timestamp_ms.value() : 0); + + bool is_locked = os_unfair_lock_trylock(lock_); + if (!is_locked) { + RTC_LOG(LS_INFO) << "RTCAudioTrack.AudioSinkConverter OnData already locked, skipping..."; + return; + } + bool is_attached = attached_; + os_unfair_lock_unlock(lock_); + + if (!is_attached) { + RTC_LOG(LS_INFO) << "RTCAudioTrack.AudioSinkConverter OnData already detached, skipping..."; + return; + } + + /* + * Convert to CMSampleBuffer + */ + + if (!(number_of_channels == 1 || number_of_channels == 2)) { + NSLog(@"RTCAudioTrack: Only mono or stereo is supported currently. numberOfChannels: %zu", + number_of_channels); + return; + } + + OSStatus status; + + AudioChannelLayout acl; + bzero(&acl, sizeof(acl)); + acl.mChannelLayoutTag = + number_of_channels == 2 ? kAudioChannelLayoutTag_Stereo : kAudioChannelLayoutTag_Mono; + + AudioStreamBasicDescription sd; + sd.mSampleRate = sample_rate; + sd.mFormatID = kAudioFormatLinearPCM; + sd.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked; + sd.mFramesPerPacket = 1; + sd.mChannelsPerFrame = number_of_channels; + sd.mBitsPerChannel = bits_per_sample; /* 16 */ + sd.mBytesPerFrame = sd.mChannelsPerFrame * (sd.mBitsPerChannel / 8); + sd.mBytesPerPacket = sd.mBytesPerFrame; + + CMSampleTimingInfo timing = { + CMTimeMake(1, sample_rate), + CMTimeMake(total_frames_, sample_rate), + kCMTimeInvalid, + }; + + total_frames_ += number_of_frames; // update the total + + CMFormatDescriptionRef format = NULL; + status = CMAudioFormatDescriptionCreate( + kCFAllocatorDefault, &sd, sizeof(acl), &acl, 0, NULL, NULL, &format); + + if (status != 0) { + NSLog(@"RTCAudioTrack: Failed to create audio format description"); + return; + } + + CMSampleBufferRef buffer; + status = CMSampleBufferCreate(kCFAllocatorDefault, + NULL, + false, + NULL, + NULL, + format, + (CMItemCount)number_of_frames, + 1, + &timing, + 0, + NULL, + &buffer); + if (status != 0) { + NSLog(@"RTCAudioTrack: Failed to allocate sample buffer"); + return; + } + + AudioBufferList bufferList; + bufferList.mNumberBuffers = 1; + bufferList.mBuffers[0].mNumberChannels = sd.mChannelsPerFrame; + bufferList.mBuffers[0].mDataByteSize = (UInt32)(number_of_frames * sd.mBytesPerFrame); + bufferList.mBuffers[0].mData = (void *)audio_data; + status = CMSampleBufferSetDataBufferFromAudioBufferList( + buffer, kCFAllocatorDefault, kCFAllocatorDefault, 0, &bufferList); + if (status != 0) { + NSLog(@"RTCAudioTrack: Failed to convert audio buffer list into sample buffer"); + return; + } + + // Report back to RTCAudioTrack + [audio_track_ didCaptureSampleBuffer:buffer]; + + CFRelease(buffer); + } +}; +} // namespace webrtc + +@implementation RTC_OBJC_TYPE (RTCAudioTrack) { + rtc::scoped_refptr _audioConverter; + // Stores weak references to renderers + NSHashTable *_renderers; + os_unfair_lock _lock; +} @synthesize source = _source; @@ -43,7 +207,21 @@ - (instancetype)initWithFactory:(RTC_OBJC_TYPE(RTCPeerConnectionFactory) *)facto NSParameterAssert(factory); NSParameterAssert(nativeTrack); NSParameterAssert(type == RTCMediaStreamTrackTypeAudio); - return [super initWithFactory:factory nativeTrack:nativeTrack type:type]; + if (self = [super initWithFactory:factory nativeTrack:nativeTrack type:type]) { + RTC_LOG(LS_INFO) << "RTCAudioTrack init"; + _renderers = [NSHashTable weakObjectsHashTable]; + _audioConverter = new rtc::RefCountedObject(self, &_lock); + } + + return self; +} + +- (void)dealloc { + os_unfair_lock_lock(&_lock); + _audioConverter->TryDetach(); + os_unfair_lock_unlock(&_lock); + + RTC_LOG(LS_INFO) << "RTCAudioTrack dealloc"; } - (RTC_OBJC_TYPE(RTCAudioSource) *)source { @@ -57,6 +235,25 @@ - (instancetype)initWithFactory:(RTC_OBJC_TYPE(RTCPeerConnectionFactory) *)facto return _source; } +- (void)addRenderer:(id)renderer { + os_unfair_lock_lock(&_lock); + [_renderers addObject:renderer]; + _audioConverter->TryAttach(); + os_unfair_lock_unlock(&_lock); +} + +- (void)removeRenderer:(id)renderer { + os_unfair_lock_lock(&_lock); + [_renderers removeObject:renderer]; + NSUInteger renderersCount = _renderers.allObjects.count; + + if (renderersCount == 0) { + // Detach if no more renderers... + _audioConverter->TryDetach(); + } + os_unfair_lock_unlock(&_lock); +} + #pragma mark - Private - (rtc::scoped_refptr)nativeAudioTrack { @@ -64,4 +261,18 @@ - (instancetype)initWithFactory:(RTC_OBJC_TYPE(RTCPeerConnectionFactory) *)facto static_cast(self.nativeTrack.get())); } +- (void)didCaptureSampleBuffer:(CMSampleBufferRef)sampleBuffer { + bool is_locked = os_unfair_lock_trylock(&_lock); + if (!is_locked) { + RTC_LOG(LS_INFO) << "RTCAudioTrack didCaptureSampleBuffer already locked, skipping..."; + return; + } + NSArray *renderers = [_renderers allObjects]; + os_unfair_lock_unlock(&_lock); + + for (id renderer in renderers) { + [renderer renderSampleBuffer:sampleBuffer]; + } +} + @end diff --git a/sdk/objc/base/RTCAudioRenderer.h b/sdk/objc/base/RTCAudioRenderer.h new file mode 100644 index 0000000000..def20eac3c --- /dev/null +++ b/sdk/objc/base/RTCAudioRenderer.h @@ -0,0 +1,34 @@ +/* + * Copyright 2023 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#import +#if TARGET_OS_IPHONE +#import +#endif + +#import "RTCMacros.h" + +NS_ASSUME_NONNULL_BEGIN + +RTC_OBJC_EXPORT @protocol RTC_OBJC_TYPE +(RTCAudioRenderer) + + - (void)renderSampleBuffer : (CMSampleBufferRef)sampleBuffer + NS_SWIFT_NAME(render(sampleBuffer:)); + +@end + +NS_ASSUME_NONNULL_END