Skip to content

Commit e4fd312

Browse files
committed
fix 128bit cttz intrinsic UB
1 parent 485b53c commit e4fd312

File tree

1 file changed

+71
-75
lines changed

1 file changed

+71
-75
lines changed

src/intrinsic/mod.rs

Lines changed: 71 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -407,41 +407,13 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
407407
Some((width, signed)) => match name {
408408
sym::ctlz => self.count_leading_zeroes(width, args[0].immediate()),
409409

410-
sym::cttz => {
411-
let func = self.current_func();
412-
let then_block = func.new_block("then");
413-
let else_block = func.new_block("else");
414-
let after_block = func.new_block("after");
415-
416-
let arg = args[0].immediate();
417-
let result = func.new_local(None, self.u32_type, "zeros");
418-
let zero = self.cx.gcc_zero(arg.get_type());
419-
let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
420-
self.llbb().end_with_conditional(None, cond, then_block, else_block);
421-
422-
let zero_result = self.cx.gcc_uint(self.u32_type, width);
423-
then_block.add_assignment(None, result, zero_result);
424-
then_block.end_with_jump(None, after_block);
425-
426-
// NOTE: since jumps were added in a place
427-
// count_leading_zeroes() does not expect, the current block
428-
// in the state need to be updated.
429-
self.switch_to_block(else_block);
430-
431-
let zeros = self.count_trailing_zeroes(width, arg);
432-
self.llbb().add_assignment(None, result, zeros);
433-
self.llbb().end_with_jump(None, after_block);
434-
435-
// NOTE: since jumps were added in a place rustc does not
436-
// expect, the current block in the state need to be updated.
437-
self.switch_to_block(after_block);
438-
439-
result.to_rvalue()
440-
}
441410
sym::ctlz_nonzero => {
442411
self.count_leading_zeroes_nonzero(width, args[0].immediate())
443412
}
444-
sym::cttz_nonzero => self.count_trailing_zeroes(width, args[0].immediate()),
413+
sym::cttz => self.count_trailing_zeroes(width, args[0].immediate()),
414+
sym::cttz_nonzero => {
415+
self.count_trailing_zeroes_nonzero(width, args[0].immediate())
416+
}
445417
sym::ctpop => self.pop_count(args[0].immediate()),
446418
sym::bswap => {
447419
if width == 8 {
@@ -983,16 +955,46 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
983955
self.context.new_cast(self.location, res, result_type)
984956
}
985957

986-
fn count_trailing_zeroes(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
987-
let arg_type = arg.get_type();
958+
fn count_trailing_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
959+
// if arg is 0, early return width, else call count_trailing_zeroes_nonzero to compute trailing zeros
960+
let func = self.current_func();
961+
let then_block = func.new_block("then");
962+
let else_block = func.new_block("else");
963+
let after_block = func.new_block("after");
964+
965+
let result = func.new_local(None, self.u32_type, "zeros");
966+
let zero = self.cx.gcc_zero(arg.get_type());
967+
let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
968+
self.llbb().end_with_conditional(None, cond, then_block, else_block);
969+
970+
let zero_result = self.cx.gcc_uint(self.u32_type, width);
971+
then_block.add_assignment(None, result, zero_result);
972+
then_block.end_with_jump(None, after_block);
973+
974+
// NOTE: since jumps were added in a place count_trailing_zeroes_nonzero() does not expect,
975+
// the current block in the state need to be updated.
976+
self.switch_to_block(else_block);
977+
978+
let zeros = self.count_trailing_zeroes_nonzero(width, arg);
979+
self.llbb().add_assignment(None, result, zeros);
980+
self.llbb().end_with_jump(None, after_block);
981+
982+
// NOTE: since jumps were added in a place rustc does not
983+
// expect, the current block in the state need to be updated.
984+
self.switch_to_block(after_block);
985+
986+
result.to_rvalue()
987+
}
988+
989+
fn count_trailing_zeroes_nonzero(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
988990
let result_type = self.u32_type;
991+
let mut arg_type = arg.get_type();
989992
let arg = if arg_type.is_signed(self.cx) {
990-
let new_type = arg_type.to_unsigned(self.cx);
991-
self.gcc_int_cast(arg, new_type)
993+
arg_type = arg_type.to_unsigned(self.cx);
994+
self.gcc_int_cast(arg, arg_type)
992995
} else {
993996
arg
994997
};
995-
let arg_type = arg.get_type();
996998
let (count_trailing_zeroes, expected_type) =
997999
// TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
9981000
// instead of using is_uint().
@@ -1007,50 +1009,44 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
10071009
("__builtin_ctzll", self.cx.ulonglong_type)
10081010
}
10091011
else if arg_type.is_u128(self.cx) {
1010-
// Adapted from the algorithm to count leading zeroes from: https://stackoverflow.com/a/28433850/389119
1011-
let array_type = self.context.new_array_type(None, arg_type, 3);
1012+
// arg is guaranteed to no be 0, so either its 64 high or 64 low bits are not 0
1013+
// __buildin_ctzll is UB when called with 0, so call it on the 64 low bits if they are not 0,
1014+
// else call it on the 64 high bits and add 64. In the else case, 64 high bits can't be 0
1015+
// because arg is not 0.
1016+
10121017
let result = self.current_func()
1013-
.new_local(None, array_type, "count_loading_zeroes_results");
1018+
.new_local(None, result_type, "count_trailing_zeroes_results");
1019+
1020+
let ctlz_then_block = self.current_func().new_block("cttz_then");
1021+
let ctlz_else_block = self.current_func().new_block("cttz_else");
1022+
let ctlz_after_block = self.current_func().new_block("cttz_after");
1023+
let ctzll = self.context.get_builtin_function("__builtin_ctzll");
10141024

1015-
let sixty_four = self.gcc_int(arg_type, 64);
1016-
let shift = self.gcc_lshr(arg, sixty_four);
1017-
let high = self.gcc_int_cast(shift, self.u64_type);
10181025
let low = self.gcc_int_cast(arg, self.u64_type);
1026+
let sixty_four = self.const_uint(arg_type, 64);
1027+
let shift = self.lshr(arg, sixty_four);
1028+
let high = self.gcc_int_cast(shift, self.u64_type);
1029+
let zero_low = self.const_uint(low.get_type(), 0);
1030+
let cond = self.gcc_icmp(IntPredicate::IntNE, low, zero_low);
1031+
self.llbb().end_with_conditional(self.location, cond, ctlz_then_block, ctlz_else_block);
1032+
self.switch_to_block(ctlz_then_block);
10191033

1020-
let zero = self.context.new_rvalue_zero(self.usize_type);
1021-
let one = self.context.new_rvalue_one(self.usize_type);
1022-
let two = self.context.new_rvalue_from_long(self.usize_type, 2);
1034+
let result_128 =
1035+
self.gcc_int_cast(self.context.new_call(None, ctzll, &[low]), result_type);
10231036

1024-
let ctzll = self.context.get_builtin_function("__builtin_ctzll");
1037+
ctlz_then_block.add_assignment(self.location, result, result_128);
1038+
ctlz_then_block.end_with_jump(self.location, ctlz_after_block);
10251039

1026-
let first_elem = self.context.new_array_access(self.location, result, zero);
1027-
let first_value = self.gcc_int_cast(self.context.new_call(self.location, ctzll, &[low]), arg_type);
1028-
self.llbb()
1029-
.add_assignment(self.location, first_elem, first_value);
1030-
1031-
let second_elem = self.context.new_array_access(self.location, result, one);
1032-
let second_value = self.gcc_add(self.gcc_int_cast(self.context.new_call(self.location, ctzll, &[high]), arg_type), sixty_four);
1033-
self.llbb()
1034-
.add_assignment(self.location, second_elem, second_value);
1035-
1036-
let third_elem = self.context.new_array_access(self.location, result, two);
1037-
let third_value = self.gcc_int(arg_type, 128);
1038-
self.llbb()
1039-
.add_assignment(self.location, third_elem, third_value);
1040-
1041-
let not_low = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, low);
1042-
let not_high = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, high);
1043-
let not_low_and_not_high = not_low & not_high;
1044-
let index = not_low + not_low_and_not_high;
1045-
// NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
1046-
// gcc.
1047-
// TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
1048-
// compilation stage.
1049-
let index = self.context.new_cast(self.location, index, self.i32_type);
1050-
1051-
let res = self.context.new_array_access(self.location, result, index);
1052-
1053-
return self.gcc_int_cast(res.to_rvalue(), result_type);
1040+
self.switch_to_block(ctlz_else_block);
1041+
let high_trailing_zeroes =
1042+
self.gcc_int_cast(self.context.new_call(None, ctzll, &[high]), result_type);
1043+
1044+
let sixty_four_result_type = self.const_uint(result_type, 64);
1045+
let result_128 = self.add(high_trailing_zeroes, sixty_four_result_type);
1046+
ctlz_else_block.add_assignment(self.location, result, result_128);
1047+
ctlz_else_block.end_with_jump(self.location, ctlz_after_block);
1048+
self.switch_to_block(ctlz_after_block);
1049+
return result.to_rvalue();
10541050
}
10551051
else {
10561052
let count_trailing_zeroes = self.context.get_builtin_function("__builtin_ctzll");

0 commit comments

Comments
 (0)