Backport renaming of BLOCK_SIZE to FG_BLOCK_SIZE from dav1d 1.3.0 (#1256)

fbossen · web-flow · commit dac2b8291717 · 2024-06-27T18:28:58.000-04:00
Also consistently use named constant instead of magic numbers.
diff --git a/src/arm/32/filmgrain.S b/src/arm/32/filmgrain.S
@@ -1481,17 +1481,17 @@ function fgy_32x32_8bpc_neon, export=1
         calc_offset     r6,  lr,  r6,  0,   0
         add_offset      r5,  r6,  lr,  r5,  r9
 
-        add             r4,  r4,  #32          // grain_lut += BLOCK_SIZE * bx
-        add             r6,  r11, r9,  lsl #5  // grain_lut += grain_stride * BLOCK_SIZE * by
+        add             r4,  r4,  #32          // grain_lut += FG_BLOCK_SIZE * bx
+        add             r6,  r11, r9,  lsl #5  // grain_lut += grain_stride * FG_BLOCK_SIZE * by
 
         ldr             r10, [sp, #120]        // type
         adr             r11, L(fgy_loop_tbl)
 
         tst             r10, #1
         ldr             r10, [r11, r10, lsl #2]
 
-        add             r8,  r8,  r9,  lsl #5  // grain_lut += grain_stride * BLOCK_SIZE * by
-        add             r8,  r8,  #32          // grain_lut += BLOCK_SIZE * bx
+        add             r8,  r8,  r9,  lsl #5  // grain_lut += grain_stride * FG_BLOCK_SIZE * by
+        add             r8,  r8,  #32          // grain_lut += FG_BLOCK_SIZE * bx
 
         add             r11, r11, r10
 
@@ -1695,10 +1695,10 @@ function fguv_32x32_\layout\()_8bpc_neon, export=1
         calc_offset     r8,  r12, r8,  \sx, \sy
         add_offset      r5,  r8,  r12, r5,  r10
 
-        add             r4,  r4,  #(32 >> \sx) // grain_lut += BLOCK_SIZE * bx
-        add             r8,  lr,  r10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
-        add             r11, r11, r10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
-        add             r11, r11, #(32 >> \sx) // grain_lut += BLOCK_SIZE * bx
+        add             r4,  r4,  #(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx
+        add             r8,  lr,  r10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
+        add             r11, r11, r10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
+        add             r11, r11, #(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx
 
         movrel_local    r12, overlap_coeffs_\sx
         ldr             lr,  [sp, #132]        // type
diff --git a/src/arm/32/filmgrain16.S b/src/arm/32/filmgrain16.S
@@ -1353,17 +1353,17 @@ function fgy_32x32_16bpc_neon, export=1
         calc_offset     r6,  lr,  r6,  0,   0
         add_offset      r5,  r6,  lr,  r5,  r9
 
-        add             r4,  r4,  #32*2        // grain_lut += BLOCK_SIZE * bx
-        add             r6,  r11, r9,  lsl #5  // grain_lut += grain_stride * BLOCK_SIZE * by
+        add             r4,  r4,  #32*2        // grain_lut += FG_BLOCK_SIZE * bx
+        add             r6,  r11, r9,  lsl #5  // grain_lut += grain_stride * FG_BLOCK_SIZE * by
 
         ldr             r10, [sp, #120]        // type
         adr             r11, L(fgy_loop_tbl)
 
         tst             r10, #1
         ldr             r10, [r11, r10, lsl #2]
 
-        add             r8,  r8,  r9,  lsl #5  // grain_lut += grain_stride * BLOCK_SIZE * by
-        add             r8,  r8,  #32*2        // grain_lut += BLOCK_SIZE * bx
+        add             r8,  r8,  r9,  lsl #5  // grain_lut += grain_stride * FG_BLOCK_SIZE * by
+        add             r8,  r8,  #32*2        // grain_lut += FG_BLOCK_SIZE * bx
 
         add             r11, r11, r10
 
@@ -1651,10 +1651,10 @@ function fguv_32x32_\layout\()_16bpc_neon, export=1
 
         vmov.16         d31[3], r7             // overlap y [1]
 
-        add             r4,  r4,  #2*(32 >> \sx)      // grain_lut += BLOCK_SIZE * bx
-        add             r8,  lr,  r10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
-        add             r11, r11, r10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
-        add             r11, r11, #2*(32 >> \sx)      // grain_lut += BLOCK_SIZE * bx
+        add             r4,  r4,  #2*(32 >> \sx)      // grain_lut += FG_BLOCK_SIZE * bx
+        add             r8,  lr,  r10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
+        add             r11, r11, r10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
+        add             r11, r11, #2*(32 >> \sx)      // grain_lut += FG_BLOCK_SIZE * bx
 
         movrel_local    r12, overlap_coeffs_\sx
         ldr             lr,       [sp, #132]   // type
diff --git a/src/arm/64/filmgrain.S b/src/arm/64/filmgrain.S
@@ -1409,14 +1409,14 @@ function fgy_32x32_8bpc_neon, export=1
         ldr             w11, [sp, #24]         // type
         adr             x13, L(fgy_loop_tbl)
 
-        add             x4,  x12, #32          // grain_lut += BLOCK_SIZE * bx
-        add             x6,  x14, x9,  lsl #5  // grain_lut += grain_stride * BLOCK_SIZE * by
+        add             x4,  x12, #32          // grain_lut += FG_BLOCK_SIZE * bx
+        add             x6,  x14, x9,  lsl #5  // grain_lut += grain_stride * FG_BLOCK_SIZE * by
 
         tst             w11, #1
         ldrh            w11, [x13, w11, uxtw #1]
 
-        add             x8,  x16, x9,  lsl #5  // grain_lut += grain_stride * BLOCK_SIZE * by
-        add             x8,  x8,  #32          // grain_lut += BLOCK_SIZE * bx
+        add             x8,  x16, x9,  lsl #5  // grain_lut += grain_stride * FG_BLOCK_SIZE * by
+        add             x8,  x8,  #32          // grain_lut += FG_BLOCK_SIZE * bx
 
         sub             x11, x13, w11, uxtw
 
@@ -1638,10 +1638,10 @@ function fguv_32x32_\layout\()_8bpc_neon, export=1
         add_offset      x17, w16, x17, x5,  x10
         add_offset      x5,  w8,  x11, x5,  x10
 
-        add             x4,  x13, #(32 >> \sx) // grain_lut += BLOCK_SIZE * bx
-        add             x8,  x15, x10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
-        add             x11, x17, x10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
-        add             x11, x11, #(32 >> \sx) // grain_lut += BLOCK_SIZE * bx
+        add             x4,  x13, #(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx
+        add             x8,  x15, x10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
+        add             x11, x17, x10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
+        add             x11, x11, #(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx
 
         ldr             w13, [sp, #64]         // type
 
diff --git a/src/arm/64/filmgrain16.S b/src/arm/64/filmgrain16.S
@@ -1308,14 +1308,14 @@ function fgy_32x32_16bpc_neon, export=1
         ldr             w11, [sp, #88]         // type
         adr             x13, L(fgy_loop_tbl)
 
-        add             x4,  x12, #32*2        // grain_lut += BLOCK_SIZE * bx
-        add             x6,  x14, x9,  lsl #5  // grain_lut += grain_stride * BLOCK_SIZE * by
+        add             x4,  x12, #32*2        // grain_lut += FG_BLOCK_SIZE * bx
+        add             x6,  x14, x9,  lsl #5  // grain_lut += grain_stride * FG_BLOCK_SIZE * by
 
         tst             w11, #1
         ldrh            w11, [x13, w11, uxtw #1]
 
-        add             x8,  x16, x9,  lsl #5  // grain_lut += grain_stride * BLOCK_SIZE * by
-        add             x8,  x8,  #32*2        // grain_lut += BLOCK_SIZE * bx
+        add             x8,  x16, x9,  lsl #5  // grain_lut += grain_stride * FG_BLOCK_SIZE * by
+        add             x8,  x8,  #32*2        // grain_lut += FG_BLOCK_SIZE * bx
 
         sub             x11, x13, w11, uxtw
 
@@ -1581,10 +1581,10 @@ function fguv_32x32_\layout\()_16bpc_neon, export=1
         add_offset      x17, w16, x17, x5,  x10
         add_offset      x5,  w8,  x11, x5,  x10
 
-        add             x4,  x13, #2*(32 >> \sx)      // grain_lut += BLOCK_SIZE * bx
-        add             x8,  x15, x10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
-        add             x11, x17, x10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
-        add             x11, x11, #2*(32 >> \sx)      // grain_lut += BLOCK_SIZE * bx
+        add             x4,  x13, #2*(32 >> \sx)      // grain_lut += FG_BLOCK_SIZE * bx
+        add             x8,  x15, x10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
+        add             x11, x17, x10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
+        add             x11, x11, #2*(32 >> \sx)      // grain_lut += FG_BLOCK_SIZE * bx
 
         ldr             w13, [sp, #112]        // type
 
diff --git a/src/arm/filmgrain.h b/src/arm/filmgrain.h
@@ -91,8 +91,8 @@ static void fgy_32x32xn_neon(pixel *const dst_row, const pixel *const src_row,
 
     int offsets[2 /* col offset */][2 /* row offset */];
 
-    // process this row in BLOCK_SIZE^2 blocks
-    for (unsigned bx = 0; bx < pw; bx += BLOCK_SIZE) {
+    // process this row in FG_BLOCK_SIZE^2 blocks
+    for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE) {
 
         if (data->overlap_flag && bx) {
             // shift previous offsets left
@@ -155,8 +155,8 @@ fguv_32x32xn_##nm##_neon(pixel *const dst_row, const pixel *const src_row, \
  \
     int offsets[2 /* col offset */][2 /* row offset */]; \
  \
-    /* process this row in BLOCK_SIZE^2 blocks (subsampled) */ \
-    for (unsigned bx = 0; bx < pw; bx += BLOCK_SIZE >> sx) { \
+    /* process this row in FG_BLOCK_SIZE^2 blocks (subsampled) */ \
+    for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE >> sx) { \
         if (data->overlap_flag && bx) { \
             /* shift previous offsets left */ \
             for (int i = 0; i < rows; i++) \
diff --git a/src/fg_apply.rs b/src/fg_apply.rs
@@ -7,7 +7,7 @@ use crate::include::dav1d::headers::Rav1dPixelLayout;
 use crate::include::dav1d::picture::Rav1dPicture;
 use crate::src::align::ArrayDefault;
 use crate::src::filmgrain::Rav1dFilmGrainDSPContext;
-use crate::src::filmgrain::BLOCK_SIZE;
+use crate::src::filmgrain::FG_BLOCK_SIZE;
 use crate::src::internal::GrainBD;
 use std::cmp;
 
@@ -146,7 +146,7 @@ pub(crate) fn rav1d_apply_grain_row<BD: BitDepth>(
     let bd = BD::from_c(bitdepth_max);
 
     if data.num_y_points != 0 {
-        let bh = cmp::min(h - row * BLOCK_SIZE, BLOCK_SIZE);
+        let bh = cmp::min(h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
         dsp.fgy_32x32xn.call(
             &out_data[0],
             &in_data[0],
@@ -164,12 +164,12 @@ pub(crate) fn rav1d_apply_grain_row<BD: BitDepth>(
         return;
     }
 
-    let bh = cmp::min(h - row * BLOCK_SIZE, BLOCK_SIZE) + ss_y >> ss_y;
+    let bh = cmp::min(h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y >> ss_y;
 
     // extend padding pixels
     if out.p.w as usize & ss_x != 0 {
         let luma = in_data[0].with_offset::<BD>();
-        let luma = luma + (row * BLOCK_SIZE) as isize * luma.pixel_stride::<BD>();
+        let luma = luma + (row * FG_BLOCK_SIZE) as isize * luma.pixel_stride::<BD>();
         for y in 0..bh {
             let luma = luma + (y as isize * (luma.pixel_stride::<BD>() << ss_y));
             let padding = &mut *(luma + (out.p.w as usize - 1)).slice_mut::<BD>(2);
@@ -225,7 +225,7 @@ pub(crate) fn rav1d_apply_grain<BD: BitDepth>(
     r#in: &Rav1dPicture,
 ) {
     let mut grain = Default::default();
-    let rows = out.p.h as usize + 31 >> 5;
+    let rows = (out.p.h as usize + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
 
     rav1d_prep_grain::<BD>(dsp, out, r#in, &mut grain);
     for row in 0..rows {
diff --git a/src/fg_apply_tmpl.c b/src/fg_apply_tmpl.c
@@ -173,14 +173,14 @@ void bitfn(dav1d_apply_grain_row)(const Dav1dFilmGrainDSPContext *const dsp,
     const int cpw = (out->p.w + ss_x) >> ss_x;
     const int is_id = out->seq_hdr->mtrx == DAV1D_MC_IDENTITY;
     pixel *const luma_src =
-        ((pixel *) in->data[0]) + row * BLOCK_SIZE * PXSTRIDE(in->stride[0]);
+        ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]);
 #if BITDEPTH != 8
     const int bitdepth_max = (1 << out->p.bpc) - 1;
 #endif
 
     if (data->num_y_points) {
-        const int bh = imin(out->p.h - row * BLOCK_SIZE, BLOCK_SIZE);
-        dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * BLOCK_SIZE * PXSTRIDE(out->stride[0]),
+        const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
+        dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]),
                          luma_src, out->stride[0], data,
                          out->p.w, scaling[0], grain_lut[0], bh, row HIGHBD_TAIL_SUFFIX);
     }
@@ -191,7 +191,7 @@ void bitfn(dav1d_apply_grain_row)(const Dav1dFilmGrainDSPContext *const dsp,
         return;
     }
 
-    const int bh = (imin(out->p.h - row * BLOCK_SIZE, BLOCK_SIZE) + ss_y) >> ss_y;
+    const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
 
     // extend padding pixels
     if (out->p.w & ss_x) {
@@ -202,7 +202,7 @@ void bitfn(dav1d_apply_grain_row)(const Dav1dFilmGrainDSPContext *const dsp,
         }
     }
 
-    const ptrdiff_t uv_off = row * BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
+    const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
     if (data->chroma_scaling_from_luma) {
         for (int pl = 0; pl < 2; pl++)
             dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
@@ -233,7 +233,7 @@ void bitfn(dav1d_apply_grain)(const Dav1dFilmGrainDSPContext *const dsp,
 #else
     uint8_t scaling[3][SCALING_SIZE];
 #endif
-    const int rows = (out->p.h + 31) >> 5;
+    const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
 
     bitfn(dav1d_prep_grain)(dsp, out, in, scaling, grain_lut);
     for (int row = 0; row < rows; row++)
diff --git a/src/filmgrain.h b/src/filmgrain.h
@@ -34,7 +34,7 @@
 
 #define GRAIN_WIDTH 82
 #define GRAIN_HEIGHT 73
-#define BLOCK_SIZE 32
+#define FG_BLOCK_SIZE 32
 #if !defined(BITDEPTH) || BITDEPTH == 8
 #define SCALING_SIZE 256
 typedef int8_t entry;
diff --git a/src/filmgrain.rs b/src/filmgrain.rs
@@ -42,7 +42,7 @@ use crate::include::common::bitdepth::bd_fn;
 pub const GRAIN_WIDTH: usize = 82;
 pub const GRAIN_HEIGHT: usize = 73;
 
-pub const BLOCK_SIZE: usize = 32;
+pub const FG_BLOCK_SIZE: usize = 32;
 
 const SUB_GRAIN_WIDTH: usize = 44;
 const SUB_GRAIN_HEIGHT: usize = 38;
@@ -123,7 +123,7 @@ impl fgy_32x32xn::Fn {
         row_num: usize,
         bd: BD,
     ) {
-        let row_strides = (row_num * BLOCK_SIZE) as isize;
+        let row_strides = (row_num * FG_BLOCK_SIZE) as isize;
         let dst_row = dst.with_offset::<BD>() + row_strides * dst.pixel_stride::<BD>();
         let src_row = src.with_offset::<BD>() + row_strides * src.pixel_stride::<BD>();
         let dst_row_ptr = dst_row.as_mut_ptr::<BD>().cast();
@@ -195,7 +195,7 @@ impl fguv_32x32xn::Fn {
         bd: BD,
     ) {
         let ss_y = (layout == Rav1dPixelLayoutSubSampled::I420) as usize;
-        let row_strides = (row_num * BLOCK_SIZE) as isize;
+        let row_strides = (row_num * FG_BLOCK_SIZE) as isize;
         let dst_row = dst.with_offset::<BD>() + (row_strides * dst.pixel_stride::<BD>() >> ss_y);
         let src_row = src.with_offset::<BD>() + (row_strides * src.pixel_stride::<BD>() >> ss_y);
         let dst_row_ptr = dst_row.as_mut_ptr::<BD>().cast();
@@ -572,7 +572,7 @@ fn sample_lut<BD: BitDepth>(
     let randval = offsets[bx][by] as usize;
     let offx = 3 + (2 >> subx) * (3 + (randval >> 4));
     let offy = 3 + (2 >> suby) * (3 + (randval & ((1 << 4) - 1)));
-    grain_lut[offy + y + (BLOCK_SIZE >> suby) * by][offx + x + (BLOCK_SIZE >> subx) * bx]
+    grain_lut[offy + y + (FG_BLOCK_SIZE >> suby) * by][offx + x + (FG_BLOCK_SIZE >> subx) * bx]
         .as_::<i32>()
 }
 
@@ -638,13 +638,13 @@ fn fgy_32x32xn_rust<BD: BitDepth>(
 
     let mut seed = row_seed(rows, row_num, data);
 
-    assert!(dst_row.stride() % (BLOCK_SIZE * mem::size_of::<BD::Pixel>()) as isize == 0);
+    assert!(dst_row.stride() % (FG_BLOCK_SIZE * mem::size_of::<BD::Pixel>()) as isize == 0);
 
     let mut offsets: [[c_int; 2]; 2] = [[0; 2 /* row offset */]; 2 /* col offset */];
 
-    // process this row in BLOCK_SIZE^2 blocks
-    for bx in (0..pw).step_by(BLOCK_SIZE) {
-        let bw = cmp::min(BLOCK_SIZE, pw - bx);
+    // process this row in FG_BLOCK_SIZE^2 blocks
+    for bx in (0..pw).step_by(FG_BLOCK_SIZE) {
+        let bw = cmp::min(FG_BLOCK_SIZE, pw - bx);
 
         if data.overlap_flag && bx != 0 {
             // shift previous offsets left
@@ -780,13 +780,13 @@ fn fguv_32x32xn_rust<BD: BitDepth>(
 
     let mut seed = row_seed(rows, row_num, data);
 
-    assert!(dst_row.stride() % (BLOCK_SIZE * mem::size_of::<BD::Pixel>()) as isize == 0);
+    assert!(dst_row.stride() % (FG_BLOCK_SIZE * mem::size_of::<BD::Pixel>()) as isize == 0);
 
     let mut offsets: [[c_int; 2]; 2] = [[0; 2 /* row offset */]; 2 /* col offset */];
 
-    // process this row in BLOCK_SIZE^2 blocks (subsampled)
-    for bx in (0..pw).step_by(BLOCK_SIZE >> sx) {
-        let bw = cmp::min(BLOCK_SIZE >> sx, pw - bx);
+    // process this row in FG_BLOCK_SIZE^2 blocks (subsampled)
+    for bx in (0..pw).step_by(FG_BLOCK_SIZE >> sx) {
+        let bw = cmp::min(FG_BLOCK_SIZE >> sx, pw - bx);
         if data.overlap_flag && bx != 0 {
             // shift previous offsets left
             for i in 0..rows {
@@ -1011,8 +1011,8 @@ unsafe fn fgy_32x32xn_neon<BD: BitDepth>(
 
     let mut offsets: [[c_int; 2]; 2] = [[0; 2 /* row offset */]; 2 /* col offset */];
 
-    // process this row in BLOCK_SIZE^2 blocks
-    for bx in (0..pw).step_by(BLOCK_SIZE) {
+    // process this row in FG_BLOCK_SIZE^2 blocks
+    for bx in (0..pw).step_by(FG_BLOCK_SIZE) {
         if data.overlap_flag && bx != 0 {
             // shift previous offsets left
             for i in 0..rows {
@@ -1128,8 +1128,8 @@ unsafe fn fguv_32x32xn_neon<BD: BitDepth, const NM: usize, const IS_SX: bool, co
 
     let mut offsets: [[c_int; 2]; 2] = [[0; 2 /* row offset */]; 2 /* col offset */];
 
-    // process this row in BLOCK_SIZE^2 blocks (subsampled)
-    for bx in (0..pw).step_by(BLOCK_SIZE >> sx) {
+    // process this row in FG_BLOCK_SIZE^2 blocks (subsampled)
+    for bx in (0..pw).step_by(FG_BLOCK_SIZE >> sx) {
         if data.overlap_flag && bx != 0 {
             // shift previous offsets left
             for i in 0..rows {
diff --git a/src/filmgrain_tmpl.c b/src/filmgrain_tmpl.c
diff --git a/src/thread_task.c b/src/thread_task.c
diff --git a/src/thread_task.rs b/src/thread_task.rs