Skip to content

Commit 56e3bc2

Browse files
authored
Audio fixes (#1319)
* Audio fixes - For devices with >2 channels, only use first 2 when loading into ffmpeg - If output pipeline only has 1 audio source, don't use audio mixer * add wrap_frame tests * clear example * add error context * cleanup * ensure with_max_channels creates new struct
1 parent ab6796d commit 56e3bc2

File tree

7 files changed

+278
-156
lines changed

7 files changed

+278
-156
lines changed

apps/web/app/s/[videoId]/_components/AuthOverlay.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ import { signIn } from "next-auth/react";
1010
import { useId, useState } from "react";
1111
import { toast } from "sonner";
1212
import { trackEvent } from "@/app/utils/analytics";
13-
import OtpForm from "./OtpForm";
1413
import { usePublicEnv } from "@/utils/public-env";
14+
import OtpForm from "./OtpForm";
1515

1616
interface AuthOverlayProps {
1717
isOpen: boolean;

apps/web/app/s/[videoId]/_components/Sidebar.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ export const Sidebar = forwardRef<{ scrollToBottom: () => void }, SidebarProps>(
9797
: !(
9898
videoSettings?.disableTranscript ??
9999
data.orgSettings?.disableTranscript
100-
)
100+
)
101101
? "transcript"
102102
: "activity";
103103

crates/media-info/src/lib.rs

Lines changed: 95 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ pub enum AudioInfoError {
2424
}
2525

2626
impl AudioInfo {
27-
pub const MAX_AUDIO_CHANNELS: u16 = 8;
27+
pub const MAX_AUDIO_CHANNELS: u16 = 16;
2828

2929
pub const fn new(
3030
sample_format: Sample,
@@ -133,29 +133,48 @@ impl AudioInfo {
133133
frame
134134
}
135135

136-
pub fn wrap_frame(&self, data: &[u8]) -> frame::Audio {
137-
let sample_size = self.sample_size();
138-
let interleaved_chunk_size = sample_size * self.channels;
139-
let samples = data.len() / interleaved_chunk_size;
136+
/// Always expects packed input data
137+
pub fn wrap_frame_with_max_channels(
138+
&self,
139+
packed_data: &[u8],
140+
max_channels: usize,
141+
) -> frame::Audio {
142+
let out_channels = self.channels.min(max_channels);
140143

141-
let mut frame = frame::Audio::new(self.sample_format, samples, self.channel_layout());
144+
let sample_size = self.sample_size();
145+
let packed_sample_size = sample_size * self.channels;
146+
let samples = packed_data.len() / packed_sample_size;
147+
148+
let mut frame = frame::Audio::new(
149+
self.sample_format,
150+
samples,
151+
ChannelLayout::default(out_channels as i32),
152+
);
142153
frame.set_rate(self.sample_rate);
143154

144155
if self.channels == 0 {
145156
unreachable!()
146-
} else if self.channels == 1 || frame.is_packed() {
147-
frame.data_mut(0)[0..data.len()].copy_from_slice(data)
157+
} else if self.channels == 1 || (frame.is_packed() && self.channels <= max_channels) {
158+
frame.data_mut(0)[0..packed_data.len()].copy_from_slice(packed_data)
159+
} else if frame.is_packed() && self.channels > out_channels {
160+
for (chunk_index, packed_chunk) in packed_data.chunks(packed_sample_size).enumerate() {
161+
let start = chunk_index * sample_size * out_channels;
162+
let end = start + sample_size * out_channels;
163+
164+
frame.data_mut(0)[start..end].copy_from_slice(&packed_chunk[0..(end - start)]);
165+
}
148166
} else {
149167
// cpal *always* returns interleaved data (i.e. the first sample from every channel, followed
150168
// by the second sample from every channel, et cetera). Many audio codecs work better/primarily
151169
// with planar data, so we de-interleave it here if there is more than one channel.
152170

153-
for (chunk_index, interleaved_chunk) in data.chunks(interleaved_chunk_size).enumerate()
171+
for (chunk_index, interleaved_chunk) in
172+
packed_data.chunks(packed_sample_size).enumerate()
154173
{
155174
let start = chunk_index * sample_size;
156175
let end = start + sample_size;
157176

158-
for channel in 0..self.channels {
177+
for channel in 0..self.channels.min(max_channels) {
159178
let channel_start = channel * sample_size;
160179
let channel_end = channel_start + sample_size;
161180
frame.data_mut(channel)[start..end]
@@ -166,6 +185,17 @@ impl AudioInfo {
166185

167186
frame
168187
}
188+
189+
/// Always expects packed input data
190+
pub fn wrap_frame(&self, data: &[u8]) -> frame::Audio {
191+
self.wrap_frame_with_max_channels(data, self.channels)
192+
}
193+
194+
pub fn with_max_channels(&self, channels: u16) -> Self {
195+
let mut this = *self;
196+
this.channels = this.channels.min(channels as usize);
197+
this
198+
}
169199
}
170200

171201
pub enum RawVideoFormat {
@@ -292,3 +322,58 @@ pub fn ffmpeg_sample_format_for(sample_format: SampleFormat) -> Option<Sample> {
292322
_ => None,
293323
}
294324
}
325+
326+
#[cfg(test)]
327+
mod tests {
328+
use super::*;
329+
330+
mod audio_info {
331+
use super::*;
332+
333+
#[test]
334+
fn wrap_packed_frame() {
335+
let info = AudioInfo::new_raw(Sample::U8(Type::Packed), 2, 4);
336+
337+
let input = &[1, 2, 3, 4, 1, 2, 3, 4];
338+
let frame = info.wrap_frame(input);
339+
340+
assert_eq!(&frame.data(0)[0..input.len()], input);
341+
}
342+
343+
#[test]
344+
fn wrap_planar_frame() {
345+
let info = AudioInfo::new_raw(Sample::U8(Type::Planar), 2, 4);
346+
347+
let input = &[1, 2, 3, 4, 1, 2, 3, 4];
348+
let frame = info.wrap_frame(input);
349+
350+
assert_eq!(frame.planes(), 4);
351+
assert_eq!(&frame.data(0)[0..2], &[1, 1]);
352+
assert_eq!(&frame.data(1)[0..2], &[2, 2]);
353+
assert_eq!(&frame.data(2)[0..2], &[3, 3]);
354+
assert_eq!(&frame.data(3)[0..2], &[4, 4]);
355+
}
356+
357+
#[test]
358+
fn wrap_packed_frame_max_channels() {
359+
let info = AudioInfo::new_raw(Sample::U8(Type::Packed), 2, 4);
360+
361+
let input = &[1, 2, 3, 4, 1, 2, 3, 4];
362+
let frame = info.wrap_frame_with_max_channels(input, 2);
363+
364+
assert_eq!(&frame.data(0)[0..4], &[1, 2, 1, 2]);
365+
}
366+
367+
#[test]
368+
fn wrap_planar_frame_max_channels() {
369+
let info = AudioInfo::new_raw(Sample::U8(Type::Planar), 2, 4);
370+
371+
let input = &[1, 2, 3, 4, 1, 2, 3, 4];
372+
let frame = info.wrap_frame_with_max_channels(input, 2);
373+
374+
assert_eq!(frame.planes(), 2);
375+
assert_eq!(&frame.data(0)[0..2], &[1, 1]);
376+
assert_eq!(&frame.data(1)[0..2], &[2, 2]);
377+
}
378+
}
379+
}

crates/recording/examples/recording-cli.rs

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
use cap_recording::{screen_capture::ScreenCaptureTarget, *};
1+
use cap_recording::{feeds::*, screen_capture::ScreenCaptureTarget, *};
2+
use kameo::Actor as _;
23
use scap_targets::Display;
3-
use std::time::Duration;
4+
use std::{sync::Arc, time::Duration};
45
use tracing::*;
56

67
#[tokio::main]
@@ -42,31 +43,26 @@ pub async fn main() {
4243

4344
// mic_feed
4445
// .ask(microphone::SetInput {
45-
// label:
46-
// // MicrophoneFeed::list()
47-
// // .into_iter()
48-
// // .find(|(k, _)| k.contains("Focusrite"))
49-
// MicrophoneFeed::default()
50-
// .map(|v| v.0)
51-
// .unwrap(),
46+
// label: MicrophoneFeed::default_device().map(|v| v.0).unwrap(),
5247
// })
5348
// .await
5449
// .unwrap()
5550
// .await
5651
// .unwrap();
5752

58-
tokio::time::sleep(Duration::from_millis(10)).await;
53+
// tokio::time::sleep(Duration::from_millis(10)).await;
5954

6055
let handle = instant_recording::Actor::builder(
6156
dir.path().into(),
6257
ScreenCaptureTarget::Display {
6358
id: Display::primary().id(),
6459
},
6560
)
66-
// .with_system_audio(true)
61+
.with_system_audio(true)
6762
// .with_camera_feed(std::sync::Arc::new(
6863
// camera_feed.ask(feeds::camera::Lock).await.unwrap(),
6964
// ))
65+
// .with_mic_feed(Arc::new(mic_feed.ask(microphone::Lock).await.unwrap()))
7066
.build(
7167
#[cfg(target_os = "macos")]
7268
cidre::sc::ShareableContent::current().await.unwrap(),

0 commit comments

Comments
 (0)