From c01c3828545ffd6e87d1a27c19bc8fb0a4698bfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 11 Sep 2024 17:16:31 +0400 Subject: [PATCH 01/11] feat(server): warn if encoding takes >10ms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marc-André Lureau --- crates/ironrdp-server/src/lib.rs | 24 ++++++++++++++++++++++++ crates/ironrdp-server/src/server.rs | 4 ++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/crates/ironrdp-server/src/lib.rs b/crates/ironrdp-server/src/lib.rs index b48685e7c..be9f0e2d8 100644 --- a/crates/ironrdp-server/src/lib.rs +++ b/crates/ironrdp-server/src/lib.rs @@ -28,3 +28,27 @@ pub use handler::*; pub use helper::*; pub use server::*; pub use sound::*; + +#[macro_export] +macro_rules! time_warn { + ($context:expr, $threshold_ms:expr, $op:expr) => {{ + #[cold] + fn warn_log(context: &str, duration: u128) { + use std::sync::atomic::AtomicUsize; + + static COUNT: AtomicUsize = AtomicUsize::new(0); + let current_count = COUNT.fetch_add(1, ::std::sync::atomic::Ordering::Relaxed); + if current_count < 50 || current_count % 100 == 0 { + ::tracing::warn!("{context} took {duration} ms! (count: {current_count})"); + } + } + + let start = std::time::Instant::now(); + let result = $op; + let duration = start.elapsed().as_millis(); + if duration > $threshold_ms { + warn_log($context, duration); + } + result + }}; +} diff --git a/crates/ironrdp-server/src/server.rs b/crates/ironrdp-server/src/server.rs index 6af9e6260..939ed50c5 100644 --- a/crates/ironrdp-server/src/server.rs +++ b/crates/ironrdp-server/src/server.rs @@ -32,7 +32,7 @@ use crate::clipboard::CliprdrServerFactory; use crate::display::{DisplayUpdate, RdpServerDisplay}; use crate::encoder::UpdateEncoder; use crate::handler::RdpServerInputHandler; -use crate::{builder, capabilities, SoundServerFactory}; +use crate::{builder, capabilities, time_warn, SoundServerFactory}; #[derive(Clone)] pub struct RdpServerOptions { @@ -417,7 +417,7 @@ impl RdpServer { let mut fragmenter = match update { DisplayUpdate::Bitmap(bitmap) => { let (enc, res) = task::spawn_blocking(move || { - let res = encoder.bitmap(bitmap).map(|r| r.into_owned()); + let res = time_warn!("Encoding bitmap", 10, encoder.bitmap(bitmap).map(|r| r.into_owned())); (encoder, res) }) .await?; From 1b039484097476c2f87f3beb63b35c66d46a2b3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 31 Oct 2024 12:28:01 +0400 Subject: [PATCH 02/11] refactor(server): factor out remotefx tile encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marc-André Lureau --- crates/ironrdp-server/src/encoder/rfx.rs | 175 ++++++++++++++++------- 1 file changed, 123 insertions(+), 52 deletions(-) diff --git a/crates/ironrdp-server/src/encoder/rfx.rs b/crates/ironrdp-server/src/encoder/rfx.rs index 14863b7e8..55c67a526 100644 --- a/crates/ironrdp-server/src/encoder/rfx.rs +++ b/crates/ironrdp-server/src/encoder/rfx.rs @@ -1,6 +1,7 @@ use ironrdp_core::{cast_length, other_err, EncodeResult}; use ironrdp_graphics::color_conversion::to_64x64_ycbcr_tile; use ironrdp_graphics::rfx_encode_component; +use ironrdp_graphics::rlgr::RlgrError; use ironrdp_pdu::codecs::rfx::{self, OperatingMode, RfxChannel, RfxChannelHeight, RfxChannelWidth}; use ironrdp_pdu::rdp::capability_sets::EntropyBits; use ironrdp_pdu::PduBufferParsing; @@ -59,58 +60,8 @@ impl RfxEncoder { let region = rfx::RegionPdu { rectangles }; let quant = rfx::Quant::default(); - let bpp = usize::from(bitmap.format.bytes_per_pixel()); - let width = usize::from(bitmap.width.get()); - let height = usize::from(bitmap.height.get()); - - let tiles_x = (width + 63) / 64; - let tiles_y = (height + 63) / 64; - let ntiles = tiles_x * tiles_y; - let mut tiles = Vec::with_capacity(ntiles); - let mut data = vec![0u8; 64 * 64 * 3 * ntiles]; - let mut rest = data.as_mut_slice(); - - for tile_y in 0..tiles_y { - for tile_x in 0..tiles_x { - let x = tile_x * 64; - let y = tile_y * 64; - let tile_width = std::cmp::min(width - x, 64); - let tile_height = std::cmp::min(height - y, 64); - - let input = &bitmap.data[y * bitmap.stride + x * bpp..]; - - let y = &mut [0i16; 4096]; - let cb = &mut [0i16; 4096]; - let cr = &mut [0i16; 4096]; - to_64x64_ycbcr_tile(input, tile_width, tile_height, bitmap.stride, bitmap.format, y, cb, cr); - - let (y_data, new_rest) = rest.split_at_mut(4096); - let (cb_data, new_rest) = new_rest.split_at_mut(4096); - let (cr_data, new_rest) = new_rest.split_at_mut(4096); - rest = new_rest; - let len = rfx_encode_component(y, y_data, &quant, entropy_algorithm) - .map_err(|e| other_err!("rfxenc", source: e))?; - let y_data = &y_data[..len]; - let len = rfx_encode_component(cb, cb_data, &quant, entropy_algorithm) - .map_err(|e| other_err!("rfxenc", source: e))?; - let cb_data = &cb_data[..len]; - let len = rfx_encode_component(cr, cr_data, &quant, entropy_algorithm) - .map_err(|e| other_err!("rfxenc", source: e))?; - let cr_data = &cr_data[..len]; - - let tile = rfx::Tile { - y_quant_index: 0, - cb_quant_index: 0, - cr_quant_index: 0, - x: u16::try_from(tile_x).unwrap(), - y: u16::try_from(tile_y).unwrap(), - y_data, - cb_data, - cr_data, - }; - tiles.push(tile); - } - } + let (encoder, mut data) = UpdateEncoder::new(bitmap, quant.clone(), entropy_algorithm); + let tiles = encoder.encode(&mut data)?; let quants = vec![quant]; let tile_set = rfx::TileSetPdu { @@ -148,3 +99,123 @@ impl RfxEncoder { ) } } + +struct UpdateEncoder<'a> { + bitmap: &'a BitmapUpdate, + quant: rfx::Quant, + entropy_algorithm: rfx::EntropyAlgorithm, +} + +struct UpdateEncoderData(Vec); + +struct EncodedTile<'a> { + y_data: &'a [u8], + cb_data: &'a [u8], + cr_data: &'a [u8], +} + +impl<'a> UpdateEncoder<'a> { + fn new( + bitmap: &'a BitmapUpdate, + quant: rfx::Quant, + entropy_algorithm: rfx::EntropyAlgorithm, + ) -> (Self, UpdateEncoderData) { + let this = Self { + bitmap, + quant, + entropy_algorithm, + }; + let data = this.alloc_data(); + + (this, data) + } + + fn alloc_data(&self) -> UpdateEncoderData { + let (tiles_x, tiles_y) = self.tiles_xy(); + + UpdateEncoderData(vec![0u8; 64 * 64 * 3 * tiles_x * tiles_y]) + } + + fn tiles_xy(&self) -> (usize, usize) { + ( + self.bitmap.width.get().div_ceil(64).into(), + self.bitmap.height.get().div_ceil(64).into(), + ) + } + + fn encode(&self, data: &'a mut UpdateEncoderData) -> EncodeResult>> { + let (tiles_x, tiles_y) = self.tiles_xy(); + + let chunks = data.0.chunks_mut(64 * 64 * 3); + let tiles: Vec<_> = (0..tiles_y).flat_map(|y| (0..tiles_x).map(move |x| (x, y))).collect(); + + chunks + .zip(tiles) + .map(|(buf, (tile_x, tile_y))| { + let EncodedTile { + y_data, + cb_data, + cr_data, + } = self + .encode_tile(tile_x, tile_y, buf) + .map_err(|e| other_err!("rfxenc", source: e))?; + + let tile = rfx::Tile { + y_quant_index: 0, + cb_quant_index: 0, + cr_quant_index: 0, + x: u16::try_from(tile_x).unwrap(), + y: u16::try_from(tile_y).unwrap(), + y_data, + cb_data, + cr_data, + }; + Ok(tile) + }) + .collect() + } + + fn encode_tile<'b>(&self, tile_x: usize, tile_y: usize, buf: &'b mut [u8]) -> Result, RlgrError> { + assert!(buf.len() >= 4096 * 3); + + let bpp: usize = self.bitmap.format.bytes_per_pixel().into(); + let width: usize = self.bitmap.width.get().into(); + let height: usize = self.bitmap.height.get().into(); + + let x = tile_x * 64; + let y = tile_y * 64; + let tile_width = std::cmp::min(width - x, 64); + let tile_height = std::cmp::min(height - y, 64); + let input = &self.bitmap.data[y * self.bitmap.stride + x * bpp..]; + + let y = &mut [0i16; 4096]; + let cb = &mut [0i16; 4096]; + let cr = &mut [0i16; 4096]; + to_64x64_ycbcr_tile( + input, + tile_width, + tile_height, + self.bitmap.stride, + self.bitmap.format, + y, + cb, + cr, + ); + + let (y_data, buf) = buf.split_at_mut(4096); + let (cb_data, cr_data) = buf.split_at_mut(4096); + + let len = rfx_encode_component(y, y_data, &self.quant, self.entropy_algorithm)?; + let y_data = &y_data[..len]; + let len = rfx_encode_component(cb, cb_data, &self.quant, self.entropy_algorithm)?; + let cb_data = &cb_data[..len]; + let len = rfx_encode_component(cr, cr_data, &self.quant, self.entropy_algorithm)?; + let cr_data = &cr_data[..len]; + + Ok(EncodedTile { + y_data, + cb_data, + cr_data, + }) + } +} From fd1fd39e2ef0794ace21cd73a72780480fdb58a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 5 Nov 2024 11:37:32 +0400 Subject: [PATCH 03/11] feat(bench): benchmark the remotefx encoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marc-André Lureau --- Cargo.lock | 203 +++++++++++++++++++++++ Cargo.toml | 1 + crates/ironrdp-bench/Cargo.toml | 25 +++ crates/ironrdp-bench/benches/bench.rs | 43 +++++ crates/ironrdp-server/Cargo.toml | 4 + crates/ironrdp-server/src/encoder/mod.rs | 2 +- crates/ironrdp-server/src/encoder/rfx.rs | 25 ++- crates/ironrdp-server/src/lib.rs | 9 + 8 files changed, 310 insertions(+), 2 deletions(-) create mode 100644 crates/ironrdp-bench/Cargo.toml create mode 100644 crates/ironrdp-bench/benches/bench.rs diff --git a/Cargo.lock b/Cargo.lock index f9534a67c..ecaa19128 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -163,6 +163,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.15" @@ -588,6 +594,12 @@ dependencies = [ "wayland-client", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cbc" version = "0.1.2" @@ -649,6 +661,33 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cipher" version = "0.4.4" @@ -895,6 +934,61 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.20" @@ -926,6 +1020,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "crypto" version = "0.5.1" @@ -1742,6 +1842,16 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + [[package]] name = "hashbrown" version = "0.15.0" @@ -2086,6 +2196,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "ironrdp-bench" +version = "0.0.0" +dependencies = [ + "criterion", + "ironrdp-pdu", + "ironrdp-server", +] + [[package]] name = "ironrdp-blocking" version = "0.1.0" @@ -2485,12 +2604,32 @@ dependencies = [ "x509-cert", ] +[[package]] +name = "is-terminal" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +dependencies = [ + "hermit-abi 0.4.0", + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.12.1" @@ -3232,6 +3371,12 @@ version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "oorandom" +version = "11.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" + [[package]] name = "opaque-debug" version = "0.3.1" @@ -3569,6 +3714,34 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "png" version = "0.17.14" @@ -3781,6 +3954,26 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "rc2" version = "0.8.1" @@ -4731,6 +4924,16 @@ version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ab95735ea2c8fd51154d01e39cf13912a78071c2d89abc49a7ef102a7dd725a" +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.8.0" diff --git a/Cargo.toml b/Cargo.toml index d0f0b7688..8068c3a8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ categories = ["network-programming"] ironrdp-acceptor = { version = "0.1", path = "crates/ironrdp-acceptor" } ironrdp-ainput = { version = "0.1", path = "crates/ironrdp-ainput" } ironrdp-async = { version = "0.1", path = "crates/ironrdp-async" } +ironrdp-bench = { version = "0.1", path = "crates/ironrdp-bench" } ironrdp-blocking = { version = "0.1", path = "crates/ironrdp-blocking" } ironrdp-cliprdr = { version = "0.1", path = "crates/ironrdp-cliprdr" } ironrdp-cliprdr-native = { version = "0.1", path = "crates/ironrdp-cliprdr-native" } diff --git a/crates/ironrdp-bench/Cargo.toml b/crates/ironrdp-bench/Cargo.toml new file mode 100644 index 000000000..950170ced --- /dev/null +++ b/crates/ironrdp-bench/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "ironrdp-bench" +version = "0.0.0" +description = "IronRDP benchmarks" +edition.workspace = true +license.workspace = true +homepage.workspace = true +repository.workspace = true +authors.workspace = true +keywords.workspace = true +categories.workspace = true +publish = false + +[dev-dependencies] +criterion = "0.5" +ironrdp-pdu.workspace = true +ironrdp-server = { workspace = true, features = ["__bench"] } + +[[bench]] +name = "bench" +path = "benches/bench.rs" +harness = false + +[lints] +workspace = true diff --git a/crates/ironrdp-bench/benches/bench.rs b/crates/ironrdp-bench/benches/bench.rs new file mode 100644 index 000000000..8ffe63312 --- /dev/null +++ b/crates/ironrdp-bench/benches/bench.rs @@ -0,0 +1,43 @@ +use std::num::NonZero; + +use criterion::{criterion_group, criterion_main, Criterion}; +use ironrdp_pdu::codecs::rfx; +use ironrdp_server::{ + bench::encoder::rfx::{rfx_enc, rfx_enc_tile}, + BitmapUpdate, +}; + +pub fn rfx_enc_tile_bench(c: &mut Criterion) { + let quant = rfx::Quant::default(); + let algo = rfx::EntropyAlgorithm::Rlgr3; + let bitmap = BitmapUpdate { + top: 0, + left: 0, + width: NonZero::new(64).unwrap(), + height: NonZero::new(64).unwrap(), + format: ironrdp_server::PixelFormat::ARgb32, + data: vec![0; 64 * 64 * 4], + order: ironrdp_server::PixelOrder::BottomToTop, + stride: 64 * 4, + }; + c.bench_function("rfx_enc_tile", |b| b.iter(|| rfx_enc_tile(&bitmap, &quant, algo, 0, 0))); +} + +pub fn rfx_enc_bench(c: &mut Criterion) { + let quant = rfx::Quant::default(); + let algo = rfx::EntropyAlgorithm::Rlgr3; + let bitmap = BitmapUpdate { + top: 0, + left: 0, + width: NonZero::new(2048).unwrap(), + height: NonZero::new(2048).unwrap(), + format: ironrdp_server::PixelFormat::ARgb32, + data: vec![0; 2048 * 2048 * 4], + order: ironrdp_server::PixelOrder::BottomToTop, + stride: 64 * 4, + }; + c.bench_function("rfx_enc", |b| b.iter(|| rfx_enc(&bitmap, &quant, algo))); +} + +criterion_group!(benches, rfx_enc_tile_bench, rfx_enc_bench); +criterion_main!(benches); diff --git a/crates/ironrdp-server/Cargo.toml b/crates/ironrdp-server/Cargo.toml index b13766fae..0750f926f 100644 --- a/crates/ironrdp-server/Cargo.toml +++ b/crates/ironrdp-server/Cargo.toml @@ -18,6 +18,10 @@ test = false [features] helper = ["dep:x509-cert", "dep:rustls-pemfile"] +# Internal (PRIVATE!) features used to aid testing. +# Don't rely on these whatsoever. They may disappear at any time. +__bench = [] + [dependencies] anyhow = "1.0" tokio = { version = "1", features = ["net", "macros", "sync", "rt"] } diff --git a/crates/ironrdp-server/src/encoder/mod.rs b/crates/ironrdp-server/src/encoder/mod.rs index 0ee99272d..c736da0b9 100644 --- a/crates/ironrdp-server/src/encoder/mod.rs +++ b/crates/ironrdp-server/src/encoder/mod.rs @@ -1,4 +1,4 @@ -pub(crate) mod bitmap; +mod bitmap; pub(crate) mod rfx; use std::{cmp, mem}; diff --git a/crates/ironrdp-server/src/encoder/rfx.rs b/crates/ironrdp-server/src/encoder/rfx.rs index 55c67a526..2292b2cf9 100644 --- a/crates/ironrdp-server/src/encoder/rfx.rs +++ b/crates/ironrdp-server/src/encoder/rfx.rs @@ -100,7 +100,7 @@ impl RfxEncoder { } } -struct UpdateEncoder<'a> { +pub(crate) struct UpdateEncoder<'a> { bitmap: &'a BitmapUpdate, quant: rfx::Quant, entropy_algorithm: rfx::EntropyAlgorithm, @@ -219,3 +219,26 @@ impl<'a> UpdateEncoder<'a> { }) } } + +#[cfg(feature = "__bench")] +pub(crate) mod bench { + use super::*; + + pub fn rfx_enc_tile( + bitmap: &BitmapUpdate, + quant: &rfx::Quant, + algo: rfx::EntropyAlgorithm, + tile_x: usize, + tile_y: usize, + ) { + let (enc, mut data) = UpdateEncoder::new(bitmap, quant.clone(), algo); + + enc.encode_tile(tile_x, tile_y, &mut data.0).unwrap(); + } + + pub fn rfx_enc(bitmap: &BitmapUpdate, quant: &rfx::Quant, algo: rfx::EntropyAlgorithm) { + let (enc, mut data) = UpdateEncoder::new(bitmap, quant.clone(), algo); + + enc.encode(&mut data).unwrap(); + } +} diff --git a/crates/ironrdp-server/src/lib.rs b/crates/ironrdp-server/src/lib.rs index be9f0e2d8..8ac7869dd 100644 --- a/crates/ironrdp-server/src/lib.rs +++ b/crates/ironrdp-server/src/lib.rs @@ -29,6 +29,15 @@ pub use helper::*; pub use server::*; pub use sound::*; +#[cfg(feature = "__bench")] +pub mod bench { + pub mod encoder { + pub mod rfx { + pub use crate::encoder::rfx::bench::{rfx_enc, rfx_enc_tile}; + } + } +} + #[macro_export] macro_rules! time_warn { ($context:expr, $threshold_ms:expr, $op:expr) => {{ From 5aee8e6f71793a24b4b81282864ffbea47b8f1c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Mon, 4 Nov 2024 12:40:41 +0400 Subject: [PATCH 04/11] feat(bench): benchmark rgb2yuv tile encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marc-André Lureau --- Cargo.lock | 1 + crates/ironrdp-bench/Cargo.toml | 1 + crates/ironrdp-bench/benches/bench.rs | 17 ++++++++++++++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index ecaa19128..167acc4e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2201,6 +2201,7 @@ name = "ironrdp-bench" version = "0.0.0" dependencies = [ "criterion", + "ironrdp-graphics", "ironrdp-pdu", "ironrdp-server", ] diff --git a/crates/ironrdp-bench/Cargo.toml b/crates/ironrdp-bench/Cargo.toml index 950170ced..7a2253c64 100644 --- a/crates/ironrdp-bench/Cargo.toml +++ b/crates/ironrdp-bench/Cargo.toml @@ -13,6 +13,7 @@ publish = false [dev-dependencies] criterion = "0.5" +ironrdp-graphics.workspace = true ironrdp-pdu.workspace = true ironrdp-server = { workspace = true, features = ["__bench"] } diff --git a/crates/ironrdp-bench/benches/bench.rs b/crates/ironrdp-bench/benches/bench.rs index 8ffe63312..63f1fa7a9 100644 --- a/crates/ironrdp-bench/benches/bench.rs +++ b/crates/ironrdp-bench/benches/bench.rs @@ -1,6 +1,7 @@ use std::num::NonZero; use criterion::{criterion_group, criterion_main, Criterion}; +use ironrdp_graphics::color_conversion::to_64x64_ycbcr_tile; use ironrdp_pdu::codecs::rfx; use ironrdp_server::{ bench::encoder::rfx::{rfx_enc, rfx_enc_tile}, @@ -39,5 +40,19 @@ pub fn rfx_enc_bench(c: &mut Criterion) { c.bench_function("rfx_enc", |b| b.iter(|| rfx_enc(&bitmap, &quant, algo))); } -criterion_group!(benches, rfx_enc_tile_bench, rfx_enc_bench); +pub fn to_ycbcr_bench(c: &mut Criterion) { + const WIDTH: usize = 64; + const HEIGHT: usize = 64; + let input = vec![0; WIDTH * HEIGHT * 4]; + let stride = WIDTH * 4; + let mut y = [0i16; WIDTH * HEIGHT]; + let mut cb = [0i16; WIDTH * HEIGHT]; + let mut cr = [0i16; WIDTH * HEIGHT]; + let format = ironrdp_graphics::image_processing::PixelFormat::ARgb32; + c.bench_function("to_ycbcr", |b| { + b.iter(|| to_64x64_ycbcr_tile(&input, WIDTH, HEIGHT, stride, format, &mut y, &mut cb, &mut cr)) + }); +} + +criterion_group!(benches, rfx_enc_tile_bench, rfx_enc_bench, to_ycbcr_bench); criterion_main!(benches); From d185b13aeeac786a0d59201b04e788415acd77b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Mon, 4 Nov 2024 16:00:19 +0400 Subject: [PATCH 05/11] refactor(graphics): use fixed-size slices in to_64x64_ycbcr_tile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In theory, this could help the compiler to unroll loops.. doesn't seem to be the case though, but it allows to drop the assert_eq!() at least. Signed-off-by: Marc-André Lureau --- crates/ironrdp-graphics/src/color_conversion.rs | 9 +++------ .../tests/graphics/color_conversion.rs | 6 +++--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/crates/ironrdp-graphics/src/color_conversion.rs b/crates/ironrdp-graphics/src/color_conversion.rs index 28bd8323b..675b18b51 100644 --- a/crates/ironrdp-graphics/src/color_conversion.rs +++ b/crates/ironrdp-graphics/src/color_conversion.rs @@ -135,15 +135,12 @@ pub fn to_64x64_ycbcr_tile( height: usize, stride: usize, format: PixelFormat, - y: &mut [i16], - cb: &mut [i16], - cr: &mut [i16], + y: &mut [i16; 64 * 64], + cb: &mut [i16; 64 * 64], + cr: &mut [i16; 64 * 64], ) { assert!(width <= 64); assert!(height <= 64); - assert_eq!(y.len(), 64 * 64); - assert_eq!(cb.len(), 64 * 64); - assert_eq!(cr.len(), 64 * 64); let to_rgb = pixel_format_to_rgb_fn(format); let bpp = format.bytes_per_pixel() as usize; diff --git a/crates/ironrdp-testsuite-core/tests/graphics/color_conversion.rs b/crates/ironrdp-testsuite-core/tests/graphics/color_conversion.rs index ad369feea..a601949f2 100644 --- a/crates/ironrdp-testsuite-core/tests/graphics/color_conversion.rs +++ b/crates/ironrdp-testsuite-core/tests/graphics/color_conversion.rs @@ -4,9 +4,9 @@ use ironrdp_graphics::{color_conversion::*, image_processing::PixelFormat}; fn to_64x64_ycbcr() { let input = [0u8; 4]; - let mut y = vec![0; 4096]; - let mut cb = vec![0; 4096]; - let mut cr = vec![0; 4096]; + let mut y = [0; 64 * 64]; + let mut cb = [0; 64 * 64]; + let mut cr = [0; 64 * 64]; to_64x64_ycbcr_tile(&input, 1, 1, 4, PixelFormat::ABgr32, &mut y, &mut cb, &mut cr); } From 92fe43740da916ea9a99040e79f9b21bad6e1eae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 1 Nov 2024 13:22:03 +0400 Subject: [PATCH 06/11] perf(graphics): use const generics for DWT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That seems to speed up a bit the code: rfxenc time: [46.040 µs 46.288 µs 46.698 µs] change: [-9.2580% -8.6663% -7.8304%] (p = 0.00 < 0.05) Performance has improved. Signed-off-by: Marc-André Lureau --- crates/ironrdp-graphics/src/dwt.rs | 46 +++++++++++++++--------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/crates/ironrdp-graphics/src/dwt.rs b/crates/ironrdp-graphics/src/dwt.rs index 2f545054b..6381cb542 100644 --- a/crates/ironrdp-graphics/src/dwt.rs +++ b/crates/ironrdp-graphics/src/dwt.rs @@ -1,30 +1,30 @@ use ironrdp_pdu::utils::SplitTo as _; pub fn encode(buffer: &mut [i16], temp_buffer: &mut [i16]) { - encode_block(&mut *buffer, temp_buffer, 32); - encode_block(&mut buffer[3072..], temp_buffer, 16); - encode_block(&mut buffer[3840..], temp_buffer, 8); + encode_block::<32>(&mut *buffer, temp_buffer); + encode_block::<16>(&mut buffer[3072..], temp_buffer); + encode_block::<8>(&mut buffer[3840..], temp_buffer); } -fn encode_block(buffer: &mut [i16], temp_buffer: &mut [i16], subband_width: usize) { - dwt_vertical(buffer, temp_buffer, subband_width); - dwt_horizontal(buffer, temp_buffer, subband_width); +fn encode_block(buffer: &mut [i16], temp_buffer: &mut [i16]) { + dwt_vertical::(buffer, temp_buffer); + dwt_horizontal::(buffer, temp_buffer); } // DWT in vertical direction, results in 2 sub-bands in L, H order in tmp buffer dwt. -fn dwt_vertical(buffer: &[i16], dwt: &mut [i16], subband_width: usize) { - let total_width = subband_width * 2; +fn dwt_vertical(buffer: &[i16], dwt: &mut [i16]) { + let total_width = SUBBAND_WIDTH * 2; for x in 0..total_width { - for n in 0..subband_width { + for n in 0..SUBBAND_WIDTH { let y = n * 2; let l_index = n * total_width + x; - let h_index = l_index + subband_width * total_width; + let h_index = l_index + SUBBAND_WIDTH * total_width; let src_index = y * total_width + x; dwt[h_index] = ((i32::from(buffer[src_index + total_width]) - ((i32::from(buffer[src_index]) - + i32::from(buffer[src_index + if n < subband_width - 1 { 2 * total_width } else { 0 }])) + + i32::from(buffer[src_index + if n < SUBBAND_WIDTH - 1 { 2 * total_width } else { 0 }])) >> 1)) >> 1) as i16; dwt[l_index] = (i32::from(buffer[src_index]) @@ -41,9 +41,9 @@ fn dwt_vertical(buffer: &[i16], dwt: &mut [i16], subband_width: usize) { // LL(3) order, stored in original buffer. // The lower part L generates LL(3) and HL(0). // The higher part H generates LH(1) and HH(2). -fn dwt_horizontal(mut buffer: &mut [i16], dwt: &[i16], subband_width: usize) { - let total_width = subband_width * 2; - let squared_subband_width = subband_width.pow(2); +fn dwt_horizontal(mut buffer: &mut [i16], dwt: &[i16]) { + let total_width = SUBBAND_WIDTH * 2; + let squared_subband_width = SUBBAND_WIDTH.pow(2); let mut hl = buffer.split_to(squared_subband_width); let mut lh = buffer.split_to(squared_subband_width); @@ -51,14 +51,14 @@ fn dwt_horizontal(mut buffer: &mut [i16], dwt: &[i16], subband_width: usize) { let mut ll = buffer; let (mut l_src, mut h_src) = dwt.split_at(squared_subband_width * 2); - for _ in 0..subband_width { + for _ in 0..SUBBAND_WIDTH { // L - for n in 0..subband_width { + for n in 0..SUBBAND_WIDTH { let x = n * 2; // HL hl[n] = ((i32::from(l_src[x + 1]) - - ((i32::from(l_src[x]) + i32::from(l_src[if n < subband_width - 1 { x + 2 } else { x }])) >> 1)) + - ((i32::from(l_src[x]) + i32::from(l_src[if n < SUBBAND_WIDTH - 1 { x + 2 } else { x }])) >> 1)) >> 1) as i16; // LL ll[n] = (i32::from(l_src[x]) @@ -70,12 +70,12 @@ fn dwt_horizontal(mut buffer: &mut [i16], dwt: &[i16], subband_width: usize) { } // H - for n in 0..subband_width { + for n in 0..SUBBAND_WIDTH { let x = n * 2; // HH hh[n] = ((i32::from(h_src[x + 1]) - - ((i32::from(h_src[x]) + i32::from(h_src[if n < subband_width - 1 { x + 2 } else { x }])) >> 1)) + - ((i32::from(h_src[x]) + i32::from(h_src[if n < SUBBAND_WIDTH - 1 { x + 2 } else { x }])) >> 1)) >> 1) as i16; // LH lh[n] = (i32::from(h_src[x]) @@ -86,10 +86,10 @@ fn dwt_horizontal(mut buffer: &mut [i16], dwt: &[i16], subband_width: usize) { }) as i16; } - hl = &mut hl[subband_width..]; - lh = &mut lh[subband_width..]; - hh = &mut hh[subband_width..]; - ll = &mut ll[subband_width..]; + hl = &mut hl[SUBBAND_WIDTH..]; + lh = &mut lh[SUBBAND_WIDTH..]; + hh = &mut hh[SUBBAND_WIDTH..]; + ll = &mut ll[SUBBAND_WIDTH..]; l_src = &l_src[total_width..]; h_src = &h_src[total_width..]; From 93dc066ddb4c676d74eaaeaa4c33e1bf4b6809d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 1 Nov 2024 14:51:26 +0400 Subject: [PATCH 07/11] refactor(graphics): const pixel_format_to_rgb_fn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That doesn't change the speed though, code isn't inlined afaict. Signed-off-by: Marc-André Lureau --- .../ironrdp-graphics/src/color_conversion.rs | 58 ++++++++++++------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/crates/ironrdp-graphics/src/color_conversion.rs b/crates/ironrdp-graphics/src/color_conversion.rs index 675b18b51..6a3d16ecb 100644 --- a/crates/ironrdp-graphics/src/color_conversion.rs +++ b/crates/ironrdp-graphics/src/color_conversion.rs @@ -29,28 +29,44 @@ where } } -fn pixel_format_to_rgb_fn(format: PixelFormat) -> fn(&[u8]) -> Rgb { +fn xrgb_to_rgb(pixel: &[u8]) -> Rgb { + Rgb { + r: pixel[1], + g: pixel[2], + b: pixel[3], + } +} + +fn xbgr_to_rgb(pixel: &[u8]) -> Rgb { + Rgb { + b: pixel[1], + g: pixel[2], + r: pixel[3], + } +} + +fn bgrx_to_rgb(pixel: &[u8]) -> Rgb { + Rgb { + b: pixel[0], + g: pixel[1], + r: pixel[2], + } +} + +fn rgbx_to_rgb(pixel: &[u8]) -> Rgb { + Rgb { + r: pixel[0], + g: pixel[1], + b: pixel[2], + } +} + +const fn pixel_format_to_rgb_fn(format: PixelFormat) -> fn(&[u8]) -> Rgb { match format { - PixelFormat::ARgb32 | PixelFormat::XRgb32 => |pixel: &[u8]| Rgb { - r: pixel[1], - g: pixel[2], - b: pixel[3], - }, - PixelFormat::ABgr32 | PixelFormat::XBgr32 => |pixel: &[u8]| Rgb { - b: pixel[1], - g: pixel[2], - r: pixel[3], - }, - PixelFormat::BgrA32 | PixelFormat::BgrX32 => |pixel: &[u8]| Rgb { - b: pixel[0], - g: pixel[1], - r: pixel[2], - }, - PixelFormat::RgbA32 | PixelFormat::RgbX32 => |pixel: &[u8]| Rgb { - r: pixel[0], - g: pixel[1], - b: pixel[2], - }, + PixelFormat::ARgb32 | PixelFormat::XRgb32 => xrgb_to_rgb, + PixelFormat::ABgr32 | PixelFormat::XBgr32 => xbgr_to_rgb, + PixelFormat::BgrA32 | PixelFormat::BgrX32 => bgrx_to_rgb, + PixelFormat::RgbA32 | PixelFormat::RgbX32 => rgbx_to_rgb, } } From 7e923b8915c6be50d5f4b21a3fe5c5c4a330ef2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Tue, 5 Nov 2024 11:44:11 +0400 Subject: [PATCH 08/11] perf(server): make tiles encoding parallel with rayon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can help a lot wall-clock time, but depends on CPU. rfx_enc time: [9.7885 ms 10.123 ms 10.439 ms] change: [-80.484% -79.847% -79.208%] (p = 0.00 < 0.05) Performance has improved. Signed-off-by: Marc-André Lureau --- Cargo.lock | 1 + crates/ironrdp-server/Cargo.toml | 3 +++ crates/ironrdp-server/src/encoder/rfx.rs | 7 +++++++ 3 files changed, 11 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 167acc4e5..f82b4678f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2484,6 +2484,7 @@ dependencies = [ "ironrdp-rdpsnd", "ironrdp-svc", "ironrdp-tokio", + "rayon", "rustls-pemfile", "tokio", "tokio-rustls", diff --git a/crates/ironrdp-server/Cargo.toml b/crates/ironrdp-server/Cargo.toml index 0750f926f..2bc3fb23a 100644 --- a/crates/ironrdp-server/Cargo.toml +++ b/crates/ironrdp-server/Cargo.toml @@ -16,7 +16,9 @@ doctest = true test = false [features] +default = ["rayon"] helper = ["dep:x509-cert", "dep:rustls-pemfile"] +rayon = ["dep:rayon"] # Internal (PRIVATE!) features used to aid testing. # Don't rely on these whatsoever. They may disappear at any time. @@ -42,6 +44,7 @@ ironrdp-rdpsnd.workspace = true tracing.workspace = true x509-cert = { version = "0.2.5", optional = true } rustls-pemfile = { version = "2.2.0", optional = true } +rayon = { version = "1.10.0", optional = true } [dev-dependencies] tokio = { version = "1", features = ["sync"] } diff --git a/crates/ironrdp-server/src/encoder/rfx.rs b/crates/ironrdp-server/src/encoder/rfx.rs index 2292b2cf9..6324f9e9c 100644 --- a/crates/ironrdp-server/src/encoder/rfx.rs +++ b/crates/ironrdp-server/src/encoder/rfx.rs @@ -144,9 +144,16 @@ impl<'a> UpdateEncoder<'a> { } fn encode(&self, data: &'a mut UpdateEncoderData) -> EncodeResult>> { + #[cfg(feature = "rayon")] + use rayon::prelude::*; + let (tiles_x, tiles_y) = self.tiles_xy(); + #[cfg(not(feature = "rayon"))] let chunks = data.0.chunks_mut(64 * 64 * 3); + #[cfg(feature = "rayon")] + let chunks = data.0.par_chunks_mut(64 * 64 * 3); + let tiles: Vec<_> = (0..tiles_y).flat_map(|y| (0..tiles_x).map(move |x| (x, y))).collect(); chunks From bfcf7d07e01b77ff0e46b750659e9d6995bfd2e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Sat, 2 Nov 2024 15:08:55 +0400 Subject: [PATCH 09/11] refactor(graphics): make sure Rust uses const YUV matrix values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently it already did, I do not observe perf improvements. Signed-off-by: Marc-André Lureau --- .../ironrdp-graphics/src/color_conversion.rs | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/crates/ironrdp-graphics/src/color_conversion.rs b/crates/ironrdp-graphics/src/color_conversion.rs index 6a3d16ecb..7173414aa 100644 --- a/crates/ironrdp-graphics/src/color_conversion.rs +++ b/crates/ironrdp-graphics/src/color_conversion.rs @@ -254,24 +254,33 @@ impl From for YCbCr { // terms need to be scaled by << 5 we simply scale the final // sum by >> 10 const DIVISOR: f32 = (1 << 15) as f32; + const Y_R: i32 = (0.299 * DIVISOR) as i32; + const Y_G: i32 = (0.587 * DIVISOR) as i32; + const Y_B: i32 = (0.114 * DIVISOR) as i32; + const CB_R: i32 = (0.168_935 * DIVISOR) as i32; + const CB_G: i32 = (0.331_665 * DIVISOR) as i32; + const CB_B: i32 = (0.500_59 * DIVISOR) as i32; + const CR_R: i32 = (0.499_813 * DIVISOR) as i32; + const CR_G: i32 = (0.418_531 * DIVISOR) as i32; + const CR_B: i32 = (0.081_282 * DIVISOR) as i32; let r = i32::from(r); let g = i32::from(g); let b = i32::from(b); - let y_r = r.overflowing_mul((0.299 * DIVISOR) as i32).0; - let y_g = g.overflowing_mul((0.587 * DIVISOR) as i32).0; - let y_b = b.overflowing_mul((0.114 * DIVISOR) as i32).0; + let y_r = r.overflowing_mul(Y_R).0; + let y_g = g.overflowing_mul(Y_G).0; + let y_b = b.overflowing_mul(Y_B).0; let y = y_r.overflowing_add(y_g).0.overflowing_add(y_b).0 >> 10; - let cb_r = r.overflowing_mul((0.168_935 * DIVISOR) as i32).0; - let cb_g = g.overflowing_mul((0.331_665 * DIVISOR) as i32).0; - let cb_b = b.overflowing_mul((0.500_59 * DIVISOR) as i32).0; + let cb_r = r.overflowing_mul(CB_R).0; + let cb_g = g.overflowing_mul(CB_G).0; + let cb_b = b.overflowing_mul(CB_B).0; let cb = cb_b.overflowing_sub(cb_g).0.overflowing_sub(cb_r).0 >> 10; - let cr_r = r.overflowing_mul((0.499_813 * DIVISOR) as i32).0; - let cr_g = g.overflowing_mul((0.418_531 * DIVISOR) as i32).0; - let cr_b = b.overflowing_mul((0.081_282 * DIVISOR) as i32).0; + let cr_r = r.overflowing_mul(CR_R).0; + let cr_g = g.overflowing_mul(CR_G).0; + let cr_b = b.overflowing_mul(CR_B).0; let cr = cr_r.overflowing_sub(cr_g).0.overflowing_sub(cr_b).0 >> 10; Self { From d5a81a1b8c1639df32b2441b96e55fa26c0b78bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Mon, 4 Nov 2024 14:58:42 +0400 Subject: [PATCH 10/11] refactor(graphics): use an ExactSizeIterator for iter_to_ycbcr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unfortunately, that doesn't seem to help unrolling & vectorizing: no perf improvements. Signed-off-by: Marc-André Lureau --- crates/ironrdp-graphics/src/color_conversion.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/ironrdp-graphics/src/color_conversion.rs b/crates/ironrdp-graphics/src/color_conversion.rs index 7173414aa..be837e45e 100644 --- a/crates/ironrdp-graphics/src/color_conversion.rs +++ b/crates/ironrdp-graphics/src/color_conversion.rs @@ -17,7 +17,7 @@ pub fn ycbcr_to_bgra(input: YCbCrBuffer<'_>, mut output: &mut [u8]) -> io::Resul fn iter_to_ycbcr<'a, I, C>(input: I, y: &mut [i16], cb: &mut [i16], cr: &mut [i16], conv: C) where - I: IntoIterator, + I: ExactSizeIterator, C: Fn(&[u8]) -> Rgb, { for (i, pixel) in input.into_iter().enumerate() { @@ -144,6 +144,12 @@ impl<'a> Iterator for TileIterator<'a> { } } +impl ExactSizeIterator for TileIterator<'_> { + fn len(&self) -> usize { + 64 * 64 + } +} + #[allow(clippy::too_many_arguments)] pub fn to_64x64_ycbcr_tile( input: &[u8], From a5e7a8e689723177b743f1bf105b790c91ba9aeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Mon, 4 Nov 2024 13:24:27 +0400 Subject: [PATCH 11/11] perf(graphics): help Rust to inline iter_to_ycbcr with format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rgb2yuv time: [11.706 µs 11.716 µs 11.727 µs] change: [-24.083% -23.682% -23.394%] (p = 0.00 < 0.05) Signed-off-by: Marc-André Lureau --- crates/ironrdp-graphics/src/color_conversion.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/ironrdp-graphics/src/color_conversion.rs b/crates/ironrdp-graphics/src/color_conversion.rs index be837e45e..4c1d4d081 100644 --- a/crates/ironrdp-graphics/src/color_conversion.rs +++ b/crates/ironrdp-graphics/src/color_conversion.rs @@ -164,11 +164,15 @@ pub fn to_64x64_ycbcr_tile( assert!(width <= 64); assert!(height <= 64); - let to_rgb = pixel_format_to_rgb_fn(format); let bpp = format.bytes_per_pixel() as usize; let input = TileIterator::new(input, width, height, stride, bpp); - iter_to_ycbcr(input, y, cb, cr, to_rgb); + match format { + PixelFormat::ARgb32 | PixelFormat::XRgb32 => iter_to_ycbcr(input, y, cb, cr, xrgb_to_rgb), + PixelFormat::ABgr32 | PixelFormat::XBgr32 => iter_to_ycbcr(input, y, cb, cr, xbgr_to_rgb), + PixelFormat::BgrA32 | PixelFormat::BgrX32 => iter_to_ycbcr(input, y, cb, cr, bgrx_to_rgb), + PixelFormat::RgbA32 | PixelFormat::RgbX32 => iter_to_ycbcr(input, y, cb, cr, rgbx_to_rgb), + }; } /// Convert a 16-bit RDP color to RGB representation. Input value should be represented in