From 131428ab62bc95020d661ed4fb984caae3557dbf Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek Date: Thu, 28 Nov 2024 10:27:52 +0100 Subject: [PATCH] Add initial benchmarking setup. --- Cargo.lock | 1 + .../src/pipeline/graphics_context.rs | 2 +- integration_tests/Cargo.toml | 1 + integration_tests/src/bin/benchmark/args.rs | 202 ++++++++++++ integration_tests/src/bin/benchmark/main.rs | 302 ++++++++++++++++++ vk-video/src/lib.rs | 4 +- vk-video/src/vulkan_decoder/vulkan_ctx.rs | 6 +- 7 files changed, 512 insertions(+), 6 deletions(-) create mode 100644 integration_tests/src/bin/benchmark/args.rs create mode 100644 integration_tests/src/bin/benchmark/main.rs diff --git a/Cargo.lock b/Cargo.lock index b78821882..99229849c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2396,6 +2396,7 @@ version = "0.1.0" dependencies = [ "anyhow", "bytes", + "clap", "compositor_api", "compositor_chromium", "compositor_pipeline", diff --git a/compositor_pipeline/src/pipeline/graphics_context.rs b/compositor_pipeline/src/pipeline/graphics_context.rs index ea30eb26f..587ba62d1 100644 --- a/compositor_pipeline/src/pipeline/graphics_context.rs +++ b/compositor_pipeline/src/pipeline/graphics_context.rs @@ -9,7 +9,7 @@ pub struct VulkanCtx { pub instance: Arc, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct GraphicsContext { pub device: Arc, pub queue: Arc, diff --git a/integration_tests/Cargo.toml b/integration_tests/Cargo.toml index 07831f1b8..be2c85f49 100644 --- a/integration_tests/Cargo.toml +++ b/integration_tests/Cargo.toml @@ -17,6 +17,7 @@ web_renderer = [ ] [dependencies] +clap = { version = "4.5", features = ["derive"] } live_compositor = { workspace = true } compositor_api = { workspace = true } compositor_pipeline = { workspace = true } diff --git a/integration_tests/src/bin/benchmark/args.rs b/integration_tests/src/bin/benchmark/args.rs new file mode 100644 index 000000000..f0f5920a2 --- /dev/null +++ b/integration_tests/src/bin/benchmark/args.rs @@ -0,0 +1,202 @@ +use std::{path::PathBuf, time::Duration}; + +use compositor_pipeline::pipeline::{self, encoder::ffmpeg_h264}; + +#[derive(Debug, Clone, Copy)] +pub enum Argument { + IterateExp, + Maximize, + Constant(u64), +} + +impl Argument { + pub fn as_constant(&self) -> Option { + if let Self::Constant(v) = self { + Some(*v) + } else { + None + } + } +} + +impl std::str::FromStr for Argument { + type Err = String; + + fn from_str(s: &str) -> Result { + if s == "iterate_exp" { + return Ok(Argument::IterateExp); + } + + if s == "maximize" { + return Ok(Argument::Maximize); + } + + s.parse::() + .map(Argument::Constant) + .map_err(|e| format!("{e}")) + } +} + +#[derive(Debug, Clone, Copy)] +pub struct DurationWrapper(pub Duration); + +impl std::str::FromStr for DurationWrapper { + type Err = std::num::ParseFloatError; + + fn from_str(s: &str) -> Result { + s.parse::() + .map(|f| DurationWrapper(Duration::from_secs_f64(f))) + } +} + +#[derive(Debug, Clone, Copy, clap::ValueEnum)] +#[clap(rename_all = "snake_case")] +pub enum VideoDecoder { + FfmpegH264, + #[cfg(not(target_os = "macos"))] + VulkanVideoH264, +} + +impl From for pipeline::VideoDecoder { + fn from(value: VideoDecoder) -> Self { + match value { + VideoDecoder::FfmpegH264 => pipeline::VideoDecoder::FFmpegH264, + #[cfg(not(target_os = "macos"))] + VideoDecoder::VulkanVideoH264 => pipeline::VideoDecoder::VulkanVideoH264, + } + } +} + +#[derive(Debug, Clone, Copy, clap::ValueEnum)] +#[clap(rename_all = "snake_case")] +pub enum EncoderPreset { + Ultrafast, + Superfast, + Veryfast, + Faster, + Fast, + Medium, + Slow, + Slower, + Veryslow, + Placebo, +} + +impl From for ffmpeg_h264::EncoderPreset { + fn from(value: EncoderPreset) -> Self { + match value { + EncoderPreset::Ultrafast => ffmpeg_h264::EncoderPreset::Ultrafast, + EncoderPreset::Superfast => ffmpeg_h264::EncoderPreset::Superfast, + EncoderPreset::Veryfast => ffmpeg_h264::EncoderPreset::Veryfast, + EncoderPreset::Faster => ffmpeg_h264::EncoderPreset::Faster, + EncoderPreset::Fast => ffmpeg_h264::EncoderPreset::Fast, + EncoderPreset::Medium => ffmpeg_h264::EncoderPreset::Medium, + EncoderPreset::Slow => ffmpeg_h264::EncoderPreset::Slow, + EncoderPreset::Slower => ffmpeg_h264::EncoderPreset::Slower, + EncoderPreset::Veryslow => ffmpeg_h264::EncoderPreset::Veryslow, + EncoderPreset::Placebo => ffmpeg_h264::EncoderPreset::Placebo, + } + } +} + +/// Only one option can be set to "maximize" +#[derive(Debug, Clone, clap::Parser)] +pub struct Args { + /// [possible values: iterate_exp, maximize or a number] + #[arg(long)] + pub framerate: Argument, + + /// [possible values: iterate_exp, maximize or a number] + #[arg(long)] + pub decoder_count: Argument, + + #[arg(long)] + pub file_path: PathBuf, + + #[arg(long)] + pub output_width: u32, + + #[arg(long)] + pub output_height: u32, + + #[arg(long)] + pub encoder_preset: EncoderPreset, + + /// warm-up time in seconds + #[arg(long)] + pub warm_up_time: DurationWrapper, + + /// measuring time in seconds + #[arg(long)] + pub measured_time: DurationWrapper, + + #[arg(long)] + pub video_decoder: VideoDecoder, + + /// in the end of the benchmark the framerate achieved by the compositor is multiplied by this + /// number, before comparing to the target framerate + #[arg(long)] + pub framerate_tolerance: f64, +} + +impl Args { + pub fn arguments(&self) -> Box<[Argument]> { + vec![self.framerate, self.decoder_count].into_boxed_slice() + } + + pub fn with_arguments(&self, arguments: &[Argument]) -> SingleBenchConfig { + SingleBenchConfig { + framerate: arguments[0].as_constant().unwrap(), + decoder_count: arguments[1].as_constant().unwrap(), + + file_path: self.file_path.clone(), + output_width: self.output_width, + output_height: self.output_height, + warm_up_time: self.warm_up_time.0, + measured_time: self.measured_time.0, + video_decoder: self.video_decoder.into(), + output_encoder_preset: self.encoder_preset.into(), + framerate_tolerance_multiplier: self.framerate_tolerance, + } + } +} + +pub struct SingleBenchConfig { + pub decoder_count: u64, + pub framerate: u64, + pub file_path: PathBuf, + pub output_width: u32, + pub output_height: u32, + pub output_encoder_preset: ffmpeg_h264::EncoderPreset, + pub warm_up_time: Duration, + pub measured_time: Duration, + pub video_decoder: pipeline::VideoDecoder, + pub framerate_tolerance_multiplier: f64, +} + +impl SingleBenchConfig { + pub fn log_running_config(&self) { + tracing::info!( + "checking configuration: framerate: {}, decoder count: {}", + self.framerate, + self.decoder_count + ); + } + + pub fn log_as_report(&self) { + print!("{}\t", self.decoder_count); + print!("{}\t", self.framerate); + print!("{}\t", self.output_width); + print!("{}\t", self.output_height); + print!("{:?}\t", self.output_encoder_preset); + print!("{:?}\t", self.warm_up_time); + print!("{:?}\t", self.measured_time); + print!("{:?}\t", self.video_decoder); + print!("{}\t", self.framerate_tolerance_multiplier); + println!(); + } + + pub fn log_report_header() { + println!("dec cnt\tfps\twidth\theight\tpreset\twarmup\tmeasured\tdec\ttol") + } +} diff --git a/integration_tests/src/bin/benchmark/main.rs b/integration_tests/src/bin/benchmark/main.rs new file mode 100644 index 000000000..d39b39b93 --- /dev/null +++ b/integration_tests/src/bin/benchmark/main.rs @@ -0,0 +1,302 @@ +use std::{ + sync::{Arc, Mutex}, + time::{Duration, Instant}, +}; + +use clap::Parser; +use compositor_pipeline::{ + pipeline::{ + encoder::VideoEncoderOptions, + input::{ + mp4::{Mp4Options, Source}, + InputOptions, + }, + output::EncodedDataOutputOptions, + GraphicsContext, Options, OutputVideoOptions, PipelineOutputEndCondition, + RegisterInputOptions, RegisterOutputOptions, + }, + queue::{self, QueueInputOptions, QueueOptions}, + Pipeline, +}; + +use compositor_pipeline::pipeline::encoder::ffmpeg_h264::Options as H264OutputOptions; +use compositor_render::{ + scene::{ + Component, HorizontalAlign, InputStreamComponent, RGBAColor, TilesComponent, VerticalAlign, + }, + web_renderer::WebRendererInitOptions, + Framerate, InputId, OutputId, Resolution, +}; +use live_compositor::{ + config::{read_config, LoggerConfig}, + logger, +}; +use tracing::warn; + +mod args; + +use args::{Args, Argument, SingleBenchConfig}; + +fn main() { + let args = Args::parse(); + let config = read_config(); + ffmpeg_next::format::network::init(); + let logger_config = LoggerConfig { + level: "compositor_pipeline=error,vk-video=info,benchmark=info".into(), + ..config.logger + }; + logger::init_logger(logger_config); + + let ctx = GraphicsContext::new( + false, + wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING + | wgpu::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + Default::default(), + None, + ) + .unwrap(); + + if cfg!(debug_assertions) { + warn!("This benchmark is running in debug mode. Make sure to run in release mode for reliable results."); + } + + let reports = run_args(ctx, &args); + SingleBenchConfig::log_report_header(); + for report in reports { + report.log_as_report(); + } +} + +fn run_args(ctx: GraphicsContext, args: &Args) -> Vec { + let arguments = args.arguments(); + let mut reports = Vec::new(); + + // check maximize count + let maximize_count = arguments + .iter() + .filter(|arg| matches!(arg, Argument::Maximize)) + .count(); + + if maximize_count > 1 { + panic!("Only one argument can be set to 'maximize'"); + } + + run_args_iterate(ctx, args, arguments, &mut reports); + + reports +} + +fn run_args_iterate( + ctx: GraphicsContext, + args: &Args, + arguments: Box<[Argument]>, + reports: &mut Vec, +) -> bool { + for (i, argument) in arguments.iter().enumerate() { + if matches!(argument, Argument::IterateExp) { + let mut any_succeeded = false; + let mut count = 1; + loop { + let mut arguments = arguments.clone(); + arguments[i] = Argument::Constant(count); + + if run_args_iterate(ctx.clone(), args, arguments, reports) { + any_succeeded = true; + count *= 2; + continue; + } else { + return any_succeeded; + } + } + } + } + + run_args_maximize(ctx, args, arguments, reports) +} + +fn run_args_maximize( + ctx: GraphicsContext, + args: &Args, + arguments: Box<[Argument]>, + reports: &mut Vec, +) -> bool { + for (i, argument) in arguments.iter().enumerate() { + if matches!(argument, Argument::Maximize) { + let upper_bound = find_upper_bound(1, |count| { + let mut arguments = arguments.clone(); + arguments[i] = Argument::Constant(count); + let config = args.with_arguments(&arguments); + config.log_running_config(); + run_single_test(ctx.clone(), config) + }); + + if upper_bound == 0 { + return false; + } + + let result = binsearch(upper_bound / 2, upper_bound, |count| { + let mut arguments = arguments.clone(); + arguments[i] = Argument::Constant(count); + let config = args.with_arguments(&arguments); + config.log_running_config(); + run_single_test(ctx.clone(), config) + }); + + let mut arguments = arguments.clone(); + arguments[i] = Argument::Constant(result); + reports.push(args.with_arguments(&arguments)); + return true; + } + } + + // if we got here, there is no maximize, so just run a single test + let config = args.with_arguments(&arguments); + run_single_test(ctx, config) +} + +fn binsearch(mut start: u64, mut end: u64, test_fn: impl Fn(u64) -> bool) -> u64 { + while start < end { + let midpoint = (start + end + 1) / 2; + + if test_fn(midpoint) { + start = midpoint; + } else { + end = midpoint - 1; + } + } + + end +} + +fn find_upper_bound(start: u64, test_fn: impl Fn(u64) -> bool) -> u64 { + let mut end = start; + + while test_fn(end) { + end *= 2; + } + + end - 1 +} + +/// true - works +/// false - too slow +fn run_single_test(ctx: GraphicsContext, bench_config: SingleBenchConfig) -> bool { + let (pipeline, _event_loop) = Pipeline::new(Options { + queue_options: QueueOptions { + never_drop_output_frames: true, + output_framerate: Framerate { + num: bench_config.framerate as u32, + den: 1, + }, + default_buffer_duration: queue::DEFAULT_BUFFER_DURATION, + ahead_of_time_processing: false, + run_late_scheduled_events: true, + }, + web_renderer: WebRendererInitOptions { + enable: false, + enable_gpu: false, + }, + wgpu_ctx: Some(ctx), + force_gpu: false, + download_root: std::env::temp_dir(), + wgpu_features: wgpu::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING + | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + load_system_fonts: Some(false), + output_sample_rate: 48_000, + stream_fallback_timeout: Duration::from_millis(500), + tokio_rt: None, + stun_servers: Vec::new().into(), + }) + .unwrap(); + let pipeline = Arc::new(Mutex::new(pipeline)); + + let mut inputs = Vec::new(); + for i in 0..bench_config.decoder_count { + let input_id = InputId(format!("input_{i}").into()); + inputs.push(input_id.clone()); + + Pipeline::register_input( + &pipeline, + input_id, + RegisterInputOptions { + input_options: InputOptions::Mp4(Mp4Options { + should_loop: true, + video_decoder: bench_config.video_decoder, + source: Source::File(bench_config.file_path.clone()), + }), + queue_options: QueueInputOptions { + offset: Some(Duration::ZERO), + required: true, + buffer_duration: None, + }, + }, + ) + .unwrap(); + } + + let output_id = OutputId("output".into()); + let receiver = Pipeline::register_encoded_data_output( + &pipeline, + output_id, + RegisterOutputOptions { + video: Some(OutputVideoOptions { + end_condition: PipelineOutputEndCondition::AnyInput, + initial: Component::Tiles(TilesComponent { + id: None, + width: Some(bench_config.output_width as f32), + height: Some(bench_config.output_height as f32), + margin: 2.0, + padding: 0.0, + children: inputs + .into_iter() + .map(|i| { + Component::InputStream(InputStreamComponent { + id: None, + input_id: i, + }) + }) + .collect(), + transition: None, + vertical_align: VerticalAlign::Center, + horizontal_align: HorizontalAlign::Center, + background_color: RGBAColor(128, 128, 128, 0), + tile_aspect_ratio: (16, 9), + }), + }), + + audio: None, + output_options: EncodedDataOutputOptions { + audio: None, + video: Some(VideoEncoderOptions::H264(H264OutputOptions { + preset: bench_config.output_encoder_preset, + resolution: Resolution { + width: bench_config.output_width as usize, + height: bench_config.output_height as usize, + }, + raw_options: Vec::new(), + })), + }, + }, + ) + .unwrap(); + + Pipeline::start(&pipeline); + + let start_time = Instant::now(); + while Instant::now() - start_time < bench_config.warm_up_time { + _ = receiver.recv().unwrap(); + } + + let start_time = Instant::now(); + let mut produced_frames: usize = 0; + while Instant::now() - start_time < bench_config.measured_time { + _ = receiver.recv().unwrap(); + produced_frames += 1; + } + + let end_time = Instant::now(); + + let framerate = produced_frames as f64 / (end_time - start_time).as_secs_f64(); + + framerate * bench_config.framerate_tolerance_multiplier > bench_config.framerate as f64 +} diff --git a/vk-video/src/lib.rs b/vk-video/src/lib.rs index 5948eed65..bdf426821 100644 --- a/vk-video/src/lib.rs +++ b/vk-video/src/lib.rs @@ -22,13 +22,13 @@ pub struct Frame { pub pts: Option, } -pub struct WgpuTexturesDeocder<'a> { +pub struct WgpuTexturesDecoder<'a> { vulkan_decoder: VulkanDecoder<'a>, parser: Parser, frame_sorter: FrameSorter, } -impl WgpuTexturesDeocder<'_> { +impl WgpuTexturesDecoder<'_> { // TODO: the below hasn't been verified. /// The produced textures have the [`wgpu::TextureFormat::NV12`] format and can be used as a copy source or a texture binding. pub fn decode( diff --git a/vk-video/src/vulkan_decoder/vulkan_ctx.rs b/vk-video/src/vulkan_decoder/vulkan_ctx.rs index daf9761e2..87dcdbf40 100644 --- a/vk-video/src/vulkan_decoder/vulkan_ctx.rs +++ b/vk-video/src/vulkan_decoder/vulkan_ctx.rs @@ -7,7 +7,7 @@ use ash::{vk, Entry}; use tracing::{debug, error, warn}; use wgpu::hal::Adapter; -use crate::{parser::Parser, BytesDecoder, DecoderError, WgpuTexturesDeocder}; +use crate::{parser::Parser, BytesDecoder, DecoderError, WgpuTexturesDecoder}; use super::{ Allocator, CommandBuffer, CommandPool, DebugMessenger, Device, FrameSorter, Instance, @@ -339,12 +339,12 @@ pub struct VulkanDevice { impl VulkanDevice { pub fn create_wgpu_textures_decoder( self: &Arc, - ) -> Result { + ) -> Result { let parser = Parser::default(); let vulkan_decoder = VulkanDecoder::new(self.clone())?; let frame_sorter = FrameSorter::::new(); - Ok(WgpuTexturesDeocder { + Ok(WgpuTexturesDecoder { parser, vulkan_decoder, frame_sorter,