Skip to content

Commit dac2b82

Browse files
authored
Backport renaming of BLOCK_SIZE to FG_BLOCK_SIZE from dav1d 1.3.0 (#1256)
Also consistently use named constant instead of magic numbers.
2 parents dd4c09c + 7b95735 commit dac2b82

12 files changed

+77
-76
lines changed

src/arm/32/filmgrain.S

+8-8
Original file line numberDiff line numberDiff line change
@@ -1481,17 +1481,17 @@ function fgy_32x32_8bpc_neon, export=1
14811481
calc_offset r6, lr, r6, 0, 0
14821482
add_offset r5, r6, lr, r5, r9
14831483

1484-
add r4, r4, #32 // grain_lut += BLOCK_SIZE * bx
1485-
add r6, r11, r9, lsl #5 // grain_lut += grain_stride * BLOCK_SIZE * by
1484+
add r4, r4, #32 // grain_lut += FG_BLOCK_SIZE * bx
1485+
add r6, r11, r9, lsl #5 // grain_lut += grain_stride * FG_BLOCK_SIZE * by
14861486

14871487
ldr r10, [sp, #120] // type
14881488
adr r11, L(fgy_loop_tbl)
14891489

14901490
tst r10, #1
14911491
ldr r10, [r11, r10, lsl #2]
14921492

1493-
add r8, r8, r9, lsl #5 // grain_lut += grain_stride * BLOCK_SIZE * by
1494-
add r8, r8, #32 // grain_lut += BLOCK_SIZE * bx
1493+
add r8, r8, r9, lsl #5 // grain_lut += grain_stride * FG_BLOCK_SIZE * by
1494+
add r8, r8, #32 // grain_lut += FG_BLOCK_SIZE * bx
14951495

14961496
add r11, r11, r10
14971497

@@ -1695,10 +1695,10 @@ function fguv_32x32_\layout\()_8bpc_neon, export=1
16951695
calc_offset r8, r12, r8, \sx, \sy
16961696
add_offset r5, r8, r12, r5, r10
16971697

1698-
add r4, r4, #(32 >> \sx) // grain_lut += BLOCK_SIZE * bx
1699-
add r8, lr, r10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
1700-
add r11, r11, r10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
1701-
add r11, r11, #(32 >> \sx) // grain_lut += BLOCK_SIZE * bx
1698+
add r4, r4, #(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx
1699+
add r8, lr, r10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
1700+
add r11, r11, r10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
1701+
add r11, r11, #(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx
17021702

17031703
movrel_local r12, overlap_coeffs_\sx
17041704
ldr lr, [sp, #132] // type

src/arm/32/filmgrain16.S

+8-8
Original file line numberDiff line numberDiff line change
@@ -1353,17 +1353,17 @@ function fgy_32x32_16bpc_neon, export=1
13531353
calc_offset r6, lr, r6, 0, 0
13541354
add_offset r5, r6, lr, r5, r9
13551355

1356-
add r4, r4, #32*2 // grain_lut += BLOCK_SIZE * bx
1357-
add r6, r11, r9, lsl #5 // grain_lut += grain_stride * BLOCK_SIZE * by
1356+
add r4, r4, #32*2 // grain_lut += FG_BLOCK_SIZE * bx
1357+
add r6, r11, r9, lsl #5 // grain_lut += grain_stride * FG_BLOCK_SIZE * by
13581358

13591359
ldr r10, [sp, #120] // type
13601360
adr r11, L(fgy_loop_tbl)
13611361

13621362
tst r10, #1
13631363
ldr r10, [r11, r10, lsl #2]
13641364

1365-
add r8, r8, r9, lsl #5 // grain_lut += grain_stride * BLOCK_SIZE * by
1366-
add r8, r8, #32*2 // grain_lut += BLOCK_SIZE * bx
1365+
add r8, r8, r9, lsl #5 // grain_lut += grain_stride * FG_BLOCK_SIZE * by
1366+
add r8, r8, #32*2 // grain_lut += FG_BLOCK_SIZE * bx
13671367

13681368
add r11, r11, r10
13691369

@@ -1651,10 +1651,10 @@ function fguv_32x32_\layout\()_16bpc_neon, export=1
16511651

16521652
vmov.16 d31[3], r7 // overlap y [1]
16531653

1654-
add r4, r4, #2*(32 >> \sx) // grain_lut += BLOCK_SIZE * bx
1655-
add r8, lr, r10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
1656-
add r11, r11, r10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
1657-
add r11, r11, #2*(32 >> \sx) // grain_lut += BLOCK_SIZE * bx
1654+
add r4, r4, #2*(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx
1655+
add r8, lr, r10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
1656+
add r11, r11, r10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
1657+
add r11, r11, #2*(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx
16581658

16591659
movrel_local r12, overlap_coeffs_\sx
16601660
ldr lr, [sp, #132] // type

src/arm/64/filmgrain.S

+8-8
Original file line numberDiff line numberDiff line change
@@ -1409,14 +1409,14 @@ function fgy_32x32_8bpc_neon, export=1
14091409
ldr w11, [sp, #24] // type
14101410
adr x13, L(fgy_loop_tbl)
14111411

1412-
add x4, x12, #32 // grain_lut += BLOCK_SIZE * bx
1413-
add x6, x14, x9, lsl #5 // grain_lut += grain_stride * BLOCK_SIZE * by
1412+
add x4, x12, #32 // grain_lut += FG_BLOCK_SIZE * bx
1413+
add x6, x14, x9, lsl #5 // grain_lut += grain_stride * FG_BLOCK_SIZE * by
14141414

14151415
tst w11, #1
14161416
ldrh w11, [x13, w11, uxtw #1]
14171417

1418-
add x8, x16, x9, lsl #5 // grain_lut += grain_stride * BLOCK_SIZE * by
1419-
add x8, x8, #32 // grain_lut += BLOCK_SIZE * bx
1418+
add x8, x16, x9, lsl #5 // grain_lut += grain_stride * FG_BLOCK_SIZE * by
1419+
add x8, x8, #32 // grain_lut += FG_BLOCK_SIZE * bx
14201420

14211421
sub x11, x13, w11, uxtw
14221422

@@ -1638,10 +1638,10 @@ function fguv_32x32_\layout\()_8bpc_neon, export=1
16381638
add_offset x17, w16, x17, x5, x10
16391639
add_offset x5, w8, x11, x5, x10
16401640

1641-
add x4, x13, #(32 >> \sx) // grain_lut += BLOCK_SIZE * bx
1642-
add x8, x15, x10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
1643-
add x11, x17, x10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
1644-
add x11, x11, #(32 >> \sx) // grain_lut += BLOCK_SIZE * bx
1641+
add x4, x13, #(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx
1642+
add x8, x15, x10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
1643+
add x11, x17, x10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
1644+
add x11, x11, #(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx
16451645

16461646
ldr w13, [sp, #64] // type
16471647

src/arm/64/filmgrain16.S

+8-8
Original file line numberDiff line numberDiff line change
@@ -1308,14 +1308,14 @@ function fgy_32x32_16bpc_neon, export=1
13081308
ldr w11, [sp, #88] // type
13091309
adr x13, L(fgy_loop_tbl)
13101310

1311-
add x4, x12, #32*2 // grain_lut += BLOCK_SIZE * bx
1312-
add x6, x14, x9, lsl #5 // grain_lut += grain_stride * BLOCK_SIZE * by
1311+
add x4, x12, #32*2 // grain_lut += FG_BLOCK_SIZE * bx
1312+
add x6, x14, x9, lsl #5 // grain_lut += grain_stride * FG_BLOCK_SIZE * by
13131313

13141314
tst w11, #1
13151315
ldrh w11, [x13, w11, uxtw #1]
13161316

1317-
add x8, x16, x9, lsl #5 // grain_lut += grain_stride * BLOCK_SIZE * by
1318-
add x8, x8, #32*2 // grain_lut += BLOCK_SIZE * bx
1317+
add x8, x16, x9, lsl #5 // grain_lut += grain_stride * FG_BLOCK_SIZE * by
1318+
add x8, x8, #32*2 // grain_lut += FG_BLOCK_SIZE * bx
13191319

13201320
sub x11, x13, w11, uxtw
13211321

@@ -1581,10 +1581,10 @@ function fguv_32x32_\layout\()_16bpc_neon, export=1
15811581
add_offset x17, w16, x17, x5, x10
15821582
add_offset x5, w8, x11, x5, x10
15831583

1584-
add x4, x13, #2*(32 >> \sx) // grain_lut += BLOCK_SIZE * bx
1585-
add x8, x15, x10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
1586-
add x11, x17, x10, lsl #(5 - \sy) // grain_lut += grain_stride * BLOCK_SIZE * by
1587-
add x11, x11, #2*(32 >> \sx) // grain_lut += BLOCK_SIZE * bx
1584+
add x4, x13, #2*(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx
1585+
add x8, x15, x10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
1586+
add x11, x17, x10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by
1587+
add x11, x11, #2*(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx
15881588

15891589
ldr w13, [sp, #112] // type
15901590

src/arm/filmgrain.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ static void fgy_32x32xn_neon(pixel *const dst_row, const pixel *const src_row,
9191

9292
int offsets[2 /* col offset */][2 /* row offset */];
9393

94-
// process this row in BLOCK_SIZE^2 blocks
95-
for (unsigned bx = 0; bx < pw; bx += BLOCK_SIZE) {
94+
// process this row in FG_BLOCK_SIZE^2 blocks
95+
for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE) {
9696

9797
if (data->overlap_flag && bx) {
9898
// shift previous offsets left
@@ -155,8 +155,8 @@ fguv_32x32xn_##nm##_neon(pixel *const dst_row, const pixel *const src_row, \
155155
\
156156
int offsets[2 /* col offset */][2 /* row offset */]; \
157157
\
158-
/* process this row in BLOCK_SIZE^2 blocks (subsampled) */ \
159-
for (unsigned bx = 0; bx < pw; bx += BLOCK_SIZE >> sx) { \
158+
/* process this row in FG_BLOCK_SIZE^2 blocks (subsampled) */ \
159+
for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE >> sx) { \
160160
if (data->overlap_flag && bx) { \
161161
/* shift previous offsets left */ \
162162
for (int i = 0; i < rows; i++) \

src/fg_apply.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::include::dav1d::headers::Rav1dPixelLayout;
77
use crate::include::dav1d::picture::Rav1dPicture;
88
use crate::src::align::ArrayDefault;
99
use crate::src::filmgrain::Rav1dFilmGrainDSPContext;
10-
use crate::src::filmgrain::BLOCK_SIZE;
10+
use crate::src::filmgrain::FG_BLOCK_SIZE;
1111
use crate::src::internal::GrainBD;
1212
use std::cmp;
1313

@@ -146,7 +146,7 @@ pub(crate) fn rav1d_apply_grain_row<BD: BitDepth>(
146146
let bd = BD::from_c(bitdepth_max);
147147

148148
if data.num_y_points != 0 {
149-
let bh = cmp::min(h - row * BLOCK_SIZE, BLOCK_SIZE);
149+
let bh = cmp::min(h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
150150
dsp.fgy_32x32xn.call(
151151
&out_data[0],
152152
&in_data[0],
@@ -164,12 +164,12 @@ pub(crate) fn rav1d_apply_grain_row<BD: BitDepth>(
164164
return;
165165
}
166166

167-
let bh = cmp::min(h - row * BLOCK_SIZE, BLOCK_SIZE) + ss_y >> ss_y;
167+
let bh = cmp::min(h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y >> ss_y;
168168

169169
// extend padding pixels
170170
if out.p.w as usize & ss_x != 0 {
171171
let luma = in_data[0].with_offset::<BD>();
172-
let luma = luma + (row * BLOCK_SIZE) as isize * luma.pixel_stride::<BD>();
172+
let luma = luma + (row * FG_BLOCK_SIZE) as isize * luma.pixel_stride::<BD>();
173173
for y in 0..bh {
174174
let luma = luma + (y as isize * (luma.pixel_stride::<BD>() << ss_y));
175175
let padding = &mut *(luma + (out.p.w as usize - 1)).slice_mut::<BD>(2);
@@ -225,7 +225,7 @@ pub(crate) fn rav1d_apply_grain<BD: BitDepth>(
225225
r#in: &Rav1dPicture,
226226
) {
227227
let mut grain = Default::default();
228-
let rows = out.p.h as usize + 31 >> 5;
228+
let rows = (out.p.h as usize + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
229229

230230
rav1d_prep_grain::<BD>(dsp, out, r#in, &mut grain);
231231
for row in 0..rows {

src/fg_apply_tmpl.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -173,14 +173,14 @@ void bitfn(dav1d_apply_grain_row)(const Dav1dFilmGrainDSPContext *const dsp,
173173
const int cpw = (out->p.w + ss_x) >> ss_x;
174174
const int is_id = out->seq_hdr->mtrx == DAV1D_MC_IDENTITY;
175175
pixel *const luma_src =
176-
((pixel *) in->data[0]) + row * BLOCK_SIZE * PXSTRIDE(in->stride[0]);
176+
((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]);
177177
#if BITDEPTH != 8
178178
const int bitdepth_max = (1 << out->p.bpc) - 1;
179179
#endif
180180

181181
if (data->num_y_points) {
182-
const int bh = imin(out->p.h - row * BLOCK_SIZE, BLOCK_SIZE);
183-
dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * BLOCK_SIZE * PXSTRIDE(out->stride[0]),
182+
const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
183+
dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]),
184184
luma_src, out->stride[0], data,
185185
out->p.w, scaling[0], grain_lut[0], bh, row HIGHBD_TAIL_SUFFIX);
186186
}
@@ -191,7 +191,7 @@ void bitfn(dav1d_apply_grain_row)(const Dav1dFilmGrainDSPContext *const dsp,
191191
return;
192192
}
193193

194-
const int bh = (imin(out->p.h - row * BLOCK_SIZE, BLOCK_SIZE) + ss_y) >> ss_y;
194+
const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
195195

196196
// extend padding pixels
197197
if (out->p.w & ss_x) {
@@ -202,7 +202,7 @@ void bitfn(dav1d_apply_grain_row)(const Dav1dFilmGrainDSPContext *const dsp,
202202
}
203203
}
204204

205-
const ptrdiff_t uv_off = row * BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
205+
const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
206206
if (data->chroma_scaling_from_luma) {
207207
for (int pl = 0; pl < 2; pl++)
208208
dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
@@ -233,7 +233,7 @@ void bitfn(dav1d_apply_grain)(const Dav1dFilmGrainDSPContext *const dsp,
233233
#else
234234
uint8_t scaling[3][SCALING_SIZE];
235235
#endif
236-
const int rows = (out->p.h + 31) >> 5;
236+
const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
237237

238238
bitfn(dav1d_prep_grain)(dsp, out, in, scaling, grain_lut);
239239
for (int row = 0; row < rows; row++)

src/filmgrain.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
#define GRAIN_WIDTH 82
3636
#define GRAIN_HEIGHT 73
37-
#define BLOCK_SIZE 32
37+
#define FG_BLOCK_SIZE 32
3838
#if !defined(BITDEPTH) || BITDEPTH == 8
3939
#define SCALING_SIZE 256
4040
typedef int8_t entry;

src/filmgrain.rs

+16-16
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ use crate::include::common::bitdepth::bd_fn;
4242
pub const GRAIN_WIDTH: usize = 82;
4343
pub const GRAIN_HEIGHT: usize = 73;
4444

45-
pub const BLOCK_SIZE: usize = 32;
45+
pub const FG_BLOCK_SIZE: usize = 32;
4646

4747
const SUB_GRAIN_WIDTH: usize = 44;
4848
const SUB_GRAIN_HEIGHT: usize = 38;
@@ -123,7 +123,7 @@ impl fgy_32x32xn::Fn {
123123
row_num: usize,
124124
bd: BD,
125125
) {
126-
let row_strides = (row_num * BLOCK_SIZE) as isize;
126+
let row_strides = (row_num * FG_BLOCK_SIZE) as isize;
127127
let dst_row = dst.with_offset::<BD>() + row_strides * dst.pixel_stride::<BD>();
128128
let src_row = src.with_offset::<BD>() + row_strides * src.pixel_stride::<BD>();
129129
let dst_row_ptr = dst_row.as_mut_ptr::<BD>().cast();
@@ -195,7 +195,7 @@ impl fguv_32x32xn::Fn {
195195
bd: BD,
196196
) {
197197
let ss_y = (layout == Rav1dPixelLayoutSubSampled::I420) as usize;
198-
let row_strides = (row_num * BLOCK_SIZE) as isize;
198+
let row_strides = (row_num * FG_BLOCK_SIZE) as isize;
199199
let dst_row = dst.with_offset::<BD>() + (row_strides * dst.pixel_stride::<BD>() >> ss_y);
200200
let src_row = src.with_offset::<BD>() + (row_strides * src.pixel_stride::<BD>() >> ss_y);
201201
let dst_row_ptr = dst_row.as_mut_ptr::<BD>().cast();
@@ -572,7 +572,7 @@ fn sample_lut<BD: BitDepth>(
572572
let randval = offsets[bx][by] as usize;
573573
let offx = 3 + (2 >> subx) * (3 + (randval >> 4));
574574
let offy = 3 + (2 >> suby) * (3 + (randval & ((1 << 4) - 1)));
575-
grain_lut[offy + y + (BLOCK_SIZE >> suby) * by][offx + x + (BLOCK_SIZE >> subx) * bx]
575+
grain_lut[offy + y + (FG_BLOCK_SIZE >> suby) * by][offx + x + (FG_BLOCK_SIZE >> subx) * bx]
576576
.as_::<i32>()
577577
}
578578

@@ -638,13 +638,13 @@ fn fgy_32x32xn_rust<BD: BitDepth>(
638638

639639
let mut seed = row_seed(rows, row_num, data);
640640

641-
assert!(dst_row.stride() % (BLOCK_SIZE * mem::size_of::<BD::Pixel>()) as isize == 0);
641+
assert!(dst_row.stride() % (FG_BLOCK_SIZE * mem::size_of::<BD::Pixel>()) as isize == 0);
642642

643643
let mut offsets: [[c_int; 2]; 2] = [[0; 2 /* row offset */]; 2 /* col offset */];
644644

645-
// process this row in BLOCK_SIZE^2 blocks
646-
for bx in (0..pw).step_by(BLOCK_SIZE) {
647-
let bw = cmp::min(BLOCK_SIZE, pw - bx);
645+
// process this row in FG_BLOCK_SIZE^2 blocks
646+
for bx in (0..pw).step_by(FG_BLOCK_SIZE) {
647+
let bw = cmp::min(FG_BLOCK_SIZE, pw - bx);
648648

649649
if data.overlap_flag && bx != 0 {
650650
// shift previous offsets left
@@ -780,13 +780,13 @@ fn fguv_32x32xn_rust<BD: BitDepth>(
780780

781781
let mut seed = row_seed(rows, row_num, data);
782782

783-
assert!(dst_row.stride() % (BLOCK_SIZE * mem::size_of::<BD::Pixel>()) as isize == 0);
783+
assert!(dst_row.stride() % (FG_BLOCK_SIZE * mem::size_of::<BD::Pixel>()) as isize == 0);
784784

785785
let mut offsets: [[c_int; 2]; 2] = [[0; 2 /* row offset */]; 2 /* col offset */];
786786

787-
// process this row in BLOCK_SIZE^2 blocks (subsampled)
788-
for bx in (0..pw).step_by(BLOCK_SIZE >> sx) {
789-
let bw = cmp::min(BLOCK_SIZE >> sx, pw - bx);
787+
// process this row in FG_BLOCK_SIZE^2 blocks (subsampled)
788+
for bx in (0..pw).step_by(FG_BLOCK_SIZE >> sx) {
789+
let bw = cmp::min(FG_BLOCK_SIZE >> sx, pw - bx);
790790
if data.overlap_flag && bx != 0 {
791791
// shift previous offsets left
792792
for i in 0..rows {
@@ -1011,8 +1011,8 @@ unsafe fn fgy_32x32xn_neon<BD: BitDepth>(
10111011

10121012
let mut offsets: [[c_int; 2]; 2] = [[0; 2 /* row offset */]; 2 /* col offset */];
10131013

1014-
// process this row in BLOCK_SIZE^2 blocks
1015-
for bx in (0..pw).step_by(BLOCK_SIZE) {
1014+
// process this row in FG_BLOCK_SIZE^2 blocks
1015+
for bx in (0..pw).step_by(FG_BLOCK_SIZE) {
10161016
if data.overlap_flag && bx != 0 {
10171017
// shift previous offsets left
10181018
for i in 0..rows {
@@ -1128,8 +1128,8 @@ unsafe fn fguv_32x32xn_neon<BD: BitDepth, const NM: usize, const IS_SX: bool, co
11281128

11291129
let mut offsets: [[c_int; 2]; 2] = [[0; 2 /* row offset */]; 2 /* col offset */];
11301130

1131-
// process this row in BLOCK_SIZE^2 blocks (subsampled)
1132-
for bx in (0..pw).step_by(BLOCK_SIZE >> sx) {
1131+
// process this row in FG_BLOCK_SIZE^2 blocks (subsampled)
1132+
for bx in (0..pw).step_by(FG_BLOCK_SIZE >> sx) {
11331133
if data.overlap_flag && bx != 0 {
11341134
// shift previous offsets left
11351135
for i in 0..rows {

0 commit comments

Comments
 (0)