@@ -82,6 +82,7 @@ use crate::src::tables::dav1d_txtp_from_uvmode;
82
82
use crate :: src:: tables:: TxfmInfo ;
83
83
use crate :: src:: wedge:: dav1d_ii_masks;
84
84
use crate :: src:: wedge:: dav1d_wedge_masks;
85
+ use assert_matches:: debug_assert_matches;
85
86
use libc:: intptr_t;
86
87
use std:: array;
87
88
use std:: cmp;
@@ -467,30 +468,46 @@ fn get_dc_sign_ctx(tx: TxfmSize, a: &[u8], l: &[u8]) -> c_uint {
467
468
fn get_lo_ctx (
468
469
levels : & [ u8 ] ,
469
470
tx_class : TxClass ,
470
- hi_mag : & mut c_uint ,
471
+ hi_mag : & mut u32 ,
471
472
ctx_offsets : Option < & [ [ u8 ; 5 ] ; 5 ] > ,
472
- x : usize ,
473
- y : usize ,
474
- stride : usize ,
475
- ) -> usize {
476
- let level = |y, x| levels[ y * stride + x] as usize ;
477
-
478
- let mut mag = level ( 0 , 1 ) + level ( 1 , 0 ) ;
479
- let offset = match tx_class {
480
- TxClass :: TwoD => {
473
+ x : u32 ,
474
+ y : u32 ,
475
+ stride : u8 ,
476
+ ) -> u8 {
477
+ let stride = stride as usize ;
478
+ let level = |y, x| levels[ y * stride + x] as u32 ;
479
+
480
+ // Note that the first `mag` initialization is moved inside the `match`
481
+ // so that the different bounds checks can be done inside the `match`,
482
+ // as putting them outside the `match` in an identical one trips up LLVM.
483
+ let mut mag;
484
+ let offset;
485
+ match ctx_offsets {
486
+ Some ( ctx_offsets) => {
487
+ level ( 2 , 1 ) ; // Bounds check all at once.
488
+ mag = level ( 0 , 1 ) + level ( 1 , 0 ) ;
489
+ debug_assert_matches ! ( tx_class, TxClass :: TwoD ) ;
481
490
mag += level ( 1 , 1 ) ;
482
- * hi_mag = mag as c_uint ;
491
+ * hi_mag = mag;
483
492
mag += level ( 0 , 2 ) + level ( 2 , 0 ) ;
484
- ctx_offsets. unwrap ( ) [ cmp:: min ( y, 4 ) ] [ cmp:: min ( x, 4 ) ] as usize
493
+ offset = ctx_offsets[ cmp:: min ( y as usize , 4 ) ] [ cmp:: min ( x as usize , 4 ) ] ;
485
494
}
486
- TxClass :: H | TxClass :: V => {
495
+ None => {
496
+ debug_assert_matches ! ( tx_class, TxClass :: H | TxClass :: V ) ;
497
+ level ( 1 , 4 ) ; // Bounds check all at once.
498
+ mag = level ( 0 , 1 ) + level ( 1 , 0 ) ;
487
499
mag += level ( 0 , 2 ) ;
488
- * hi_mag = mag as c_uint ;
500
+ * hi_mag = mag;
489
501
mag += level ( 0 , 3 ) + level ( 0 , 4 ) ;
490
- 26 + if y > 1 { 10 } else { y * 5 }
502
+ offset = 26 + if y > 1 { 10 } else { y as u8 * 5 } ;
503
+ }
504
+ }
505
+ offset
506
+ + if mag > 512 {
507
+ 4
508
+ } else {
509
+ ( ( mag + 64 ) >> 7 ) as u8
491
510
}
492
- } ;
493
- offset + if mag > 512 { 4 } else { ( mag + 64 ) >> 7 }
494
511
}
495
512
496
513
fn decode_coefs < BD : BitDepth > (
@@ -709,9 +726,9 @@ fn decode_coefs<BD: BitDepth>(
709
726
let sh = cmp:: min ( t_dim. h , 8 ) ;
710
727
711
728
// eob
712
- let mut ctx: c_uint = 1
713
- + ( eob > sw as c_int * sh as c_int * 2 ) as c_uint
714
- + ( eob > sw as c_int * sh as c_int * 4 ) as c_uint ;
729
+ let mut ctx = 1
730
+ + ( eob > sw as c_int * sh as c_int * 2 ) as u8
731
+ + ( eob > sw as c_int * sh as c_int * 4 ) as u8 ;
715
732
let eob_tok =
716
733
rav1d_msac_decode_symbol_adapt4 ( & mut ts_c. msac , & mut eob_cdf[ ctx as usize ] , 2 ) as c_int ;
717
734
let mut tok = eob_tok + 1 ;
@@ -727,7 +744,7 @@ fn decode_coefs<BD: BitDepth>(
727
744
[ nonsquare_tx. wrapping_add ( tx as c_uint & nonsquare_tx) as usize ] ,
728
745
) ;
729
746
scan = dav1d_scans[ tx as usize ] ;
730
- let stride = 4 * sh as usize ;
747
+ let stride = 4 * sh;
731
748
let shift: c_uint = if t_dim. lh < 4 {
732
749
t_dim. lh as c_uint + 2
733
750
} else {
@@ -737,7 +754,7 @@ fn decode_coefs<BD: BitDepth>(
737
754
let mask: c_uint = 4 * sh as c_uint - 1 ;
738
755
// Optimizes better than `.fill(0)`,
739
756
// which doesn't elide the bounds check, inline, or vectorize.
740
- for i in 0 ..stride * ( 4 * sw as usize + 2 ) {
757
+ for i in 0 ..stride as usize * ( 4 * sw as usize + 2 ) {
741
758
levels[ i] = 0 ;
742
759
}
743
760
let mut x: c_uint ;
@@ -793,7 +810,7 @@ fn decode_coefs<BD: BitDepth>(
793
810
}
794
811
}
795
812
cf. set :: < BD > ( f, t_cf, rc as usize , ( tok << 11 ) . as_ :: < BD :: Coef > ( ) ) ;
796
- levels[ x as usize * stride + y as usize ] = level_tok as u8 ;
813
+ levels[ x as usize * stride as usize + y as usize ] = level_tok as u8 ;
797
814
let mut i = eob - 1 ;
798
815
while i > 0 {
799
816
// ac
@@ -816,16 +833,8 @@ fn decode_coefs<BD: BitDepth>(
816
833
}
817
834
}
818
835
assert ! ( x < 32 && y < 32 ) ;
819
- let level = & mut levels[ x as usize * stride + y as usize ..] ;
820
- ctx = get_lo_ctx (
821
- level,
822
- tx_class,
823
- & mut mag,
824
- lo_ctx_offsets,
825
- x as usize ,
826
- y as usize ,
827
- stride,
828
- ) as c_uint ;
836
+ let level = & mut levels[ x as usize * stride as usize + y as usize ..] ;
837
+ ctx = get_lo_ctx ( level, tx_class, & mut mag, lo_ctx_offsets, x, y, stride) ;
829
838
if tx_class == TxClass :: TwoD {
830
839
y |= x;
831
840
}
@@ -842,16 +851,11 @@ fn decode_coefs<BD: BitDepth>(
842
851
}
843
852
if tok == 3 {
844
853
mag &= 63 ;
845
- ctx = ( ( if y > ( tx_class == TxClass :: TwoD ) as c_uint {
854
+ ctx = if y > ( tx_class == TxClass :: TwoD ) as c_uint {
846
855
14
847
856
} else {
848
857
7
849
- } ) as c_uint )
850
- . wrapping_add ( if mag > 12 {
851
- 6
852
- } else {
853
- mag. wrapping_add ( 1 ) >> 1
854
- } ) ;
858
+ } + if mag > 12 { 6 } else { ( mag as u8 + 1 ) >> 1 } ;
855
859
tok = rav1d_msac_decode_hi_tok ( & mut ts_c. msac , & mut hi_cdf[ ctx as usize ] )
856
860
as c_int ;
857
861
if dbg {
@@ -891,7 +895,7 @@ fn decode_coefs<BD: BitDepth>(
891
895
ctx = if tx_class == TxClass :: TwoD {
892
896
0
893
897
} else {
894
- get_lo_ctx ( levels, tx_class, & mut mag, lo_ctx_offsets, 0 , 0 , stride) as c_uint
898
+ get_lo_ctx ( levels, tx_class, & mut mag, lo_ctx_offsets, 0 , 0 , stride)
895
899
} ;
896
900
dc_tok =
897
901
rav1d_msac_decode_symbol_adapt4 ( & mut ts_c. msac , & mut lo_cdf[ ctx as usize ] , 3 )
@@ -904,16 +908,12 @@ fn decode_coefs<BD: BitDepth>(
904
908
}
905
909
if dc_tok == 3 {
906
910
if tx_class == TxClass :: TwoD {
907
- mag = levels[ 0 * stride + 1 ] as c_uint
908
- + levels[ 1 * stride + 0 ] as c_uint
909
- + levels[ 1 * stride + 1 ] as c_uint ;
911
+ mag = levels[ 0 * stride as usize + 1 ] as c_uint
912
+ + levels[ 1 * stride as usize + 0 ] as c_uint
913
+ + levels[ 1 * stride as usize + 1 ] as c_uint ;
910
914
}
911
915
mag &= 63 ;
912
- ctx = if mag > 12 {
913
- 6
914
- } else {
915
- mag. wrapping_add ( 1 ) >> 1
916
- } ;
916
+ ctx = if mag > 12 { 6 } else { ( mag as u8 + 1 ) >> 1 } ;
917
917
dc_tok = rav1d_msac_decode_hi_tok ( & mut ts_c. msac , & mut hi_cdf[ ctx as usize ] )
918
918
as c_uint ;
919
919
if dbg {
@@ -935,7 +935,7 @@ fn decode_coefs<BD: BitDepth>(
935
935
let mask: c_uint = 4 * sh as c_uint - 1 ;
936
936
// Optimizes better than `.fill(0)`,
937
937
// which doesn't elide the bounds check, inline, or vectorize.
938
- for i in 0 ..stride * ( 4 * sh as usize + 2 ) {
938
+ for i in 0 ..stride as usize * ( 4 * sh as usize + 2 ) {
939
939
levels[ i] = 0 ;
940
940
}
941
941
let mut x: c_uint ;
@@ -990,7 +990,7 @@ fn decode_coefs<BD: BitDepth>(
990
990
}
991
991
}
992
992
cf. set :: < BD > ( f, t_cf, rc as usize , ( tok << 11 ) . as_ :: < BD :: Coef > ( ) ) ;
993
- levels[ x as usize * stride + y as usize ] = level_tok as u8 ;
993
+ levels[ x as usize * stride as usize + y as usize ] = level_tok as u8 ;
994
994
let mut i = eob - 1 ;
995
995
while i > 0 {
996
996
let rc_i: c_uint ;
@@ -1012,16 +1012,8 @@ fn decode_coefs<BD: BitDepth>(
1012
1012
}
1013
1013
}
1014
1014
assert ! ( x < 32 && y < 32 ) ;
1015
- let level = & mut levels[ x as usize * stride + y as usize ..] ;
1016
- ctx = get_lo_ctx (
1017
- level,
1018
- tx_class,
1019
- & mut mag,
1020
- lo_ctx_offsets,
1021
- x as usize ,
1022
- y as usize ,
1023
- stride,
1024
- ) as c_uint ;
1015
+ let level = & mut levels[ x as usize * stride as usize + y as usize ..] ;
1016
+ ctx = get_lo_ctx ( level, tx_class, & mut mag, lo_ctx_offsets, x, y, stride) ;
1025
1017
if tx_class == TxClass :: TwoD {
1026
1018
y |= x;
1027
1019
}
@@ -1038,16 +1030,11 @@ fn decode_coefs<BD: BitDepth>(
1038
1030
}
1039
1031
if tok == 3 {
1040
1032
mag &= 63 ;
1041
- ctx = ( ( if y > ( tx_class == TxClass :: TwoD ) as c_uint {
1033
+ ctx = if y > ( tx_class == TxClass :: TwoD ) as c_uint {
1042
1034
14
1043
1035
} else {
1044
1036
7
1045
- } ) as c_uint )
1046
- . wrapping_add ( if mag > 12 {
1047
- 6
1048
- } else {
1049
- mag. wrapping_add ( 1 ) >> 1
1050
- } ) ;
1037
+ } + if mag > 12 { 6 } else { ( mag as u8 + 1 ) >> 1 } ;
1051
1038
tok = rav1d_msac_decode_hi_tok ( & mut ts_c. msac , & mut hi_cdf[ ctx as usize ] )
1052
1039
as c_int ;
1053
1040
if dbg {
@@ -1084,7 +1071,7 @@ fn decode_coefs<BD: BitDepth>(
1084
1071
ctx = if tx_class == TxClass :: TwoD {
1085
1072
0
1086
1073
} else {
1087
- get_lo_ctx ( levels, tx_class, & mut mag, lo_ctx_offsets, 0 , 0 , stride) as c_uint
1074
+ get_lo_ctx ( levels, tx_class, & mut mag, lo_ctx_offsets, 0 , 0 , stride)
1088
1075
} ;
1089
1076
dc_tok =
1090
1077
rav1d_msac_decode_symbol_adapt4 ( & mut ts_c. msac , & mut lo_cdf[ ctx as usize ] , 3 )
@@ -1097,16 +1084,12 @@ fn decode_coefs<BD: BitDepth>(
1097
1084
}
1098
1085
if dc_tok == 3 {
1099
1086
if tx_class == TxClass :: TwoD {
1100
- mag = levels[ 0 * stride + 1 ] as c_uint
1101
- + levels[ 1 * stride + 0 ] as c_uint
1102
- + levels[ 1 * stride + 1 ] as c_uint ;
1087
+ mag = levels[ 0 * stride as usize + 1 ] as c_uint
1088
+ + levels[ 1 * stride as usize + 0 ] as c_uint
1089
+ + levels[ 1 * stride as usize + 1 ] as c_uint ;
1103
1090
}
1104
1091
mag &= 63 ;
1105
- ctx = if mag > 12 {
1106
- 6
1107
- } else {
1108
- mag. wrapping_add ( 1 ) >> 1
1109
- } ;
1092
+ ctx = if mag > 12 { 6 } else { ( mag as u8 + 1 ) >> 1 } ;
1110
1093
dc_tok = rav1d_msac_decode_hi_tok ( & mut ts_c. msac , & mut hi_cdf[ ctx as usize ] )
1111
1094
as c_uint ;
1112
1095
if dbg {
@@ -1128,7 +1111,7 @@ fn decode_coefs<BD: BitDepth>(
1128
1111
let mask: c_uint = 4 * sw as c_uint - 1 ;
1129
1112
// Optimizes better than `.fill(0)`,
1130
1113
// which doesn't elide the bounds check, inline, or vectorize.
1131
- for i in 0 ..stride * ( 4 * sw as usize + 2 ) {
1114
+ for i in 0 ..stride as usize * ( 4 * sw as usize + 2 ) {
1132
1115
levels[ i] = 0 ;
1133
1116
}
1134
1117
let mut x: c_uint ;
@@ -1183,7 +1166,7 @@ fn decode_coefs<BD: BitDepth>(
1183
1166
}
1184
1167
}
1185
1168
cf. set :: < BD > ( f, t_cf, rc as usize , ( tok << 11 ) . as_ :: < BD :: Coef > ( ) ) ;
1186
- levels[ x as usize * stride + y as usize ] = level_tok as u8 ;
1169
+ levels[ x as usize * stride as usize + y as usize ] = level_tok as u8 ;
1187
1170
let mut i = eob - 1 ;
1188
1171
while i > 0 {
1189
1172
let rc_i: c_uint ;
@@ -1205,16 +1188,8 @@ fn decode_coefs<BD: BitDepth>(
1205
1188
}
1206
1189
}
1207
1190
assert ! ( x < 32 && y < 32 ) ;
1208
- let level = & mut levels[ x as usize * stride + y as usize ..] ;
1209
- ctx = get_lo_ctx (
1210
- level,
1211
- tx_class,
1212
- & mut mag,
1213
- lo_ctx_offsets,
1214
- x as usize ,
1215
- y as usize ,
1216
- stride,
1217
- ) as c_uint ;
1191
+ let level = & mut levels[ x as usize * stride as usize + y as usize ..] ;
1192
+ ctx = get_lo_ctx ( level, tx_class, & mut mag, lo_ctx_offsets, x, y, stride) ;
1218
1193
if tx_class == TxClass :: TwoD {
1219
1194
y |= x;
1220
1195
}
@@ -1231,16 +1206,11 @@ fn decode_coefs<BD: BitDepth>(
1231
1206
}
1232
1207
if tok == 3 {
1233
1208
mag &= 63 ;
1234
- ctx = ( ( if y > ( tx_class == TxClass :: TwoD ) as c_uint {
1209
+ ctx = if y > ( tx_class == TxClass :: TwoD ) as c_uint {
1235
1210
14
1236
1211
} else {
1237
1212
7
1238
- } ) as c_uint )
1239
- . wrapping_add ( if mag > 12 {
1240
- 6
1241
- } else {
1242
- mag. wrapping_add ( 1 ) >> 1
1243
- } ) ;
1213
+ } + if mag > 12 { 6 } else { ( mag as u8 + 1 ) >> 1 } ;
1244
1214
tok = rav1d_msac_decode_hi_tok ( & mut ts_c. msac , & mut hi_cdf[ ctx as usize ] )
1245
1215
as c_int ;
1246
1216
if dbg {
@@ -1277,7 +1247,7 @@ fn decode_coefs<BD: BitDepth>(
1277
1247
ctx = if tx_class == TxClass :: TwoD {
1278
1248
0
1279
1249
} else {
1280
- get_lo_ctx ( levels, tx_class, & mut mag, lo_ctx_offsets, 0 , 0 , stride) as c_uint
1250
+ get_lo_ctx ( levels, tx_class, & mut mag, lo_ctx_offsets, 0 , 0 , stride)
1281
1251
} ;
1282
1252
dc_tok =
1283
1253
rav1d_msac_decode_symbol_adapt4 ( & mut ts_c. msac , & mut lo_cdf[ ctx as usize ] , 3 )
@@ -1290,16 +1260,12 @@ fn decode_coefs<BD: BitDepth>(
1290
1260
}
1291
1261
if dc_tok == 3 {
1292
1262
if tx_class == TxClass :: TwoD {
1293
- mag = levels[ 0 * stride + 1 ] as c_uint
1294
- + levels[ 1 * stride + 0 ] as c_uint
1295
- + levels[ 1 * stride + 1 ] as c_uint ;
1263
+ mag = levels[ 0 * stride as usize + 1 ] as c_uint
1264
+ + levels[ 1 * stride as usize + 0 ] as c_uint
1265
+ + levels[ 1 * stride as usize + 1 ] as c_uint ;
1296
1266
}
1297
1267
mag &= 63 ;
1298
- ctx = if mag > 12 {
1299
- 6
1300
- } else {
1301
- mag. wrapping_add ( 1 ) >> 1
1302
- } ;
1268
+ ctx = if mag > 12 { 6 } else { ( mag as u8 + 1 ) >> 1 } ;
1303
1269
dc_tok = rav1d_msac_decode_hi_tok ( & mut ts_c. msac , & mut hi_cdf[ ctx as usize ] )
1304
1270
as c_uint ;
1305
1271
if dbg {
0 commit comments