From 51f0eed165c4e8893a9df450c5d5aea75d36383d Mon Sep 17 00:00:00 2001 From: Monty Montgomery Date: Thu, 14 May 2020 03:14:27 -0400 Subject: [PATCH] Fix for #2212; Crash when using 4 tiles for 1080p 4:2:2 input When doing loop filter RDO inline with the rest of the tile coding, LRUs must align to tile boundaries. An unexpected corner case means that chroma LRUs must have an even superblock width in 4:2:2 video, as LRUs must always be square. As a result, that means tiles must also have an even superblock width. As tile width must be adjusted in this case, it also means we can't use the spec's 'tile uniform spacing' mode, which would produce odd superblock width tiles in, eg, 1080p 4:2:2 video. This patch also implements explicit per-tile sizing the the frame OBU header. --- src/encoder.rs | 3 +- src/header.rs | 77 ++++++++++++++++++++++++++++++++++++--------- src/tiling/tiler.rs | 38 +++++++++++++++++++++- 3 files changed, 101 insertions(+), 17 deletions(-) diff --git a/src/encoder.rs b/src/encoder.rs index b4b0c8dd9c..b60163d88f 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -604,6 +604,7 @@ impl FrameInvariants { frame_rate, TilingInfo::tile_log2(1, config.tile_cols).unwrap(), TilingInfo::tile_log2(1, config.tile_rows).unwrap(), + sequence.chroma_sampling == ChromaSampling::Cs422, ); if config.tiles > 0 { @@ -619,6 +620,7 @@ impl FrameInvariants { frame_rate, tile_cols_log2, tile_rows_log2, + sequence.chroma_sampling == ChromaSampling::Cs422, ); if tiling.rows * tiling.cols >= config.tiles { @@ -2621,7 +2623,6 @@ fn encode_partition_topdown( let w: &mut W = if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef }; cw.write_partition(w, tile_bo, partition, bsize); } - match partition { PartitionType::PARTITION_NONE => { let part_decision = if !rdo_output.part_modes.is_empty() { diff --git a/src/header.rs b/src/header.rs index 3677c47e9f..102dccf1fb 100644 --- a/src/header.rs +++ b/src/header.rs @@ -12,6 +12,8 @@ use crate::context::*; use crate::ec::*; use crate::lrf::*; use crate::partition::*; +use crate::tiling::MAX_TILE_WIDTH; +use crate::util::Fixed; use crate::util::Pixel; use crate::DeblockState; @@ -670,25 +672,70 @@ impl UncompressedHeader for BitWriter { self.write_bit(fi.disable_frame_end_update_cdf)?; } - // tile - self.write_bit(true)?; // uniform_tile_spacing_flag + // tile + // + // Can we use the uniform spacing tile syntax? 'Uniform spacing' + // is a slight misnomer; it's more constrained than just a uniform + // spacing. let ti = &fi.tiling; - let cols_ones = ti.tile_cols_log2 - ti.min_tile_cols_log2; - for _ in 0..cols_ones { - self.write_bit(true); - } - if ti.tile_cols_log2 < ti.max_tile_cols_log2 { - self.write_bit(false); - } + if fi.sb_width.align_power_of_two_and_shift(ti.tile_cols_log2) + == ti.tile_width_sb + && fi.sb_height.align_power_of_two_and_shift(ti.tile_rows_log2) + == ti.tile_height_sb + { + // yes; our actual tile width/height setting (which is always + // currently uniform) also matches the constrained width/height + // calculation implicit in the uniform spacing flag. - let rows_ones = ti.tile_rows_log2 - ti.min_tile_rows_log2; - for _ in 0..rows_ones { - self.write_bit(true); - } - if ti.tile_rows_log2 < ti.max_tile_rows_log2 { - self.write_bit(false); + self.write_bit(true)?; // uniform_tile_spacing_flag + + let cols_ones = ti.tile_cols_log2 - ti.min_tile_cols_log2; + for _ in 0..cols_ones { + self.write_bit(true); + } + if ti.tile_cols_log2 < ti.max_tile_cols_log2 { + self.write_bit(false); + } + + let rows_ones = ti.tile_rows_log2 - ti.min_tile_rows_log2; + for _ in 0..rows_ones { + self.write_bit(true); + } + if ti.tile_rows_log2 < ti.max_tile_rows_log2 { + self.write_bit(false); + } + } else { + self.write_bit(false)?; // uniform_tile_spacing_flag + let mut sofar = 0; + let mut widest_tile_sb = 0; + for _ in 0..ti.cols { + let max = (MAX_TILE_WIDTH + >> if fi.sequence.use_128x128_superblock { 7 } else { 6 }) + .min(fi.sb_width - sofar) as u16; + let this_sb_width = ti.tile_width_sb.min(fi.sb_width - sofar); + self.write_quniform(max, (this_sb_width - 1) as u16); + sofar += this_sb_width; + widest_tile_sb = widest_tile_sb.max(this_sb_width); + } + + let max_tile_area_sb = if ti.min_tiles_log2 > 0 { + (fi.sb_height * fi.sb_width) >> (ti.min_tiles_log2 + 1) + } else { + fi.sb_height * fi.sb_width + }; + + let max_tile_height_sb = (max_tile_area_sb / widest_tile_sb).max(1); + + sofar = 0; + for i in 0..ti.rows { + let max = max_tile_height_sb.min(fi.sb_height - sofar) as u16; + let this_sb_height = ti.tile_height_sb.min(fi.sb_height - sofar); + + self.write_quniform(max, (this_sb_height - 1) as u16); + sofar += this_sb_height; + } } let tiles_log2 = ti.tile_cols_log2 + ti.tile_rows_log2; diff --git a/src/tiling/tiler.rs b/src/tiling/tiler.rs index 2dab411db2..daa3a5e9be 100644 --- a/src/tiling/tiler.rs +++ b/src/tiling/tiler.rs @@ -44,12 +44,14 @@ pub struct TilingInfo { pub min_tile_rows_log2: usize, pub max_tile_rows_log2: usize, pub sb_size_log2: usize, + pub min_tiles_log2: usize, } impl TilingInfo { pub fn from_target_tiles( sb_size_log2: usize, frame_width: usize, frame_height: usize, frame_rate: f64, tile_cols_log2: usize, tile_rows_log2: usize, + is_422_p: bool, ) -> Self { // @@ -87,7 +89,26 @@ impl TilingInfo { let tile_cols_log2 = tile_cols_log2.max(min_tile_cols_log2).min(max_tile_cols_log2); - let tile_width_sb = sb_cols.align_power_of_two_and_shift(tile_cols_log2); + let tile_width_sb_pre = + sb_cols.align_power_of_two_and_shift(tile_cols_log2); + + // If this is 4:2:2, our UV horizontal is subsampled but not our + // vertical. Loop Restoration Units must be square, so they + // will always have an even number of horizontal superblocks. For + // tiles and LRUs to align, tile_width_sb must be even in 4:2:2 + // video. + + // This is only relevant when doing loop restoration RDO inline + // with block/superblock encoding, that is, where tiles are + // relevant. If (when) we introduce optionally delaying loop-filter + // encode to after the partitioning loop, we won't need to make + // any 4:2:2 adjustment. + + let tile_width_sb = if is_422_p { + (tile_width_sb_pre + 1) >> 1 << 1 + } else { + tile_width_sb_pre + }; let min_tile_rows_log2 = if min_tiles_log2 > tile_cols_log2 { min_tiles_log2 - tile_cols_log2 @@ -123,6 +144,7 @@ impl TilingInfo { min_tile_rows_log2, max_tile_rows_log2, sb_size_log2, + min_tiles_log2, } } @@ -240,6 +262,7 @@ pub mod test { frame_rate, 0, 0, + false, ); assert_eq!(1, ti.cols); assert_eq!(1, ti.rows); @@ -253,6 +276,7 @@ pub mod test { frame_rate, 1, 1, + false, ); assert_eq!(2, ti.cols); assert_eq!(2, ti.rows); @@ -266,6 +290,7 @@ pub mod test { frame_rate, 2, 2, + false, ); assert_eq!(3, ti.cols); assert_eq!(3, ti.rows); @@ -280,6 +305,7 @@ pub mod test { frame_rate, 10, 8, + false, ); assert_eq!(3, ti.cols); assert_eq!(3, ti.rows); @@ -293,6 +319,7 @@ pub mod test { frame_rate, 0, 0, + false, ); assert_eq!(1, ti.cols); assert_eq!(1, ti.rows); @@ -336,6 +363,7 @@ pub mod test { frame_rate, 1, 1, + false, ); let mut iter = ti.tile_iter_mut(&mut fs, &mut fb); assert_eq!(4, iter.len()); @@ -359,6 +387,7 @@ pub mod test { frame_rate, 2, 2, + false, ); let mut iter = ti.tile_iter_mut(&mut fs, &mut fb); assert_eq!(9, iter.len()); @@ -406,6 +435,7 @@ pub mod test { fi.config.frame_rate(), 2, 2, + false, ); let iter = ti.tile_iter_mut(&mut fs, &mut fb); let tile_states = iter.map(|ctx| ctx.ts).collect::>(); @@ -484,6 +514,7 @@ pub mod test { fi.config.frame_rate(), 2, 2, + false, ); let iter = ti.tile_iter_mut(&mut fs, &mut fb); let tbs = iter.map(|ctx| ctx.tb).collect::>(); @@ -524,6 +555,7 @@ pub mod test { fi.config.frame_rate(), 2, 2, + false, ); let iter = ti.tile_iter_mut(&mut fs, &mut fb); let mut tile_states = iter.map(|ctx| ctx.ts).collect::>(); @@ -588,6 +620,7 @@ pub mod test { fi.config.frame_rate(), 2, 2, + false, ); let iter = ti.tile_iter_mut(&mut fs, &mut fb); let mut tile_states = iter.map(|ctx| ctx.ts).collect::>(); @@ -628,6 +661,7 @@ pub mod test { fi.config.frame_rate(), 1, 1, + false, ); let iter = ti.tile_iter_mut(&mut fs, &mut fb); let mut tile_states = iter.map(|ctx| ctx.ts).collect::>(); @@ -690,6 +724,7 @@ pub mod test { fi.config.frame_rate(), 2, 2, + false, ); let iter = ti.tile_iter_mut(&mut fs, &mut fb); let mut tile_states = iter.map(|ctx| ctx.ts).collect::>(); @@ -734,6 +769,7 @@ pub mod test { fi.config.frame_rate(), 2, 2, + false, ); let iter = ti.tile_iter_mut(&mut fs, &mut fb); let mut tbs = iter.map(|ctx| ctx.tb).collect::>();