Skip to content

Commit 485b53c

Browse files
committed
fix 128bit ctlz intrinsic UB
1 parent db67f59 commit 485b53c

File tree

1 file changed

+69
-45
lines changed

1 file changed

+69
-45
lines changed

src/intrinsic/mod.rs

Lines changed: 69 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
405405
| sym::saturating_sub => {
406406
match int_type_width_signed(args[0].layout.ty, self) {
407407
Some((width, signed)) => match name {
408-
sym::ctlz | sym::cttz => {
408+
sym::ctlz => self.count_leading_zeroes(width, args[0].immediate()),
409+
410+
sym::cttz => {
409411
let func = self.current_func();
410412
let then_block = func.new_block("then");
411413
let else_block = func.new_block("else");
@@ -426,11 +428,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
426428
// in the state need to be updated.
427429
self.switch_to_block(else_block);
428430

429-
let zeros = match name {
430-
sym::ctlz => self.count_leading_zeroes(width, arg),
431-
sym::cttz => self.count_trailing_zeroes(width, arg),
432-
_ => unreachable!(),
433-
};
431+
let zeros = self.count_trailing_zeroes(width, arg);
434432
self.llbb().add_assignment(None, result, zeros);
435433
self.llbb().end_with_jump(None, after_block);
436434

@@ -440,7 +438,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
440438

441439
result.to_rvalue()
442440
}
443-
sym::ctlz_nonzero => self.count_leading_zeroes(width, args[0].immediate()),
441+
sym::ctlz_nonzero => {
442+
self.count_leading_zeroes_nonzero(width, args[0].immediate())
443+
}
444444
sym::cttz_nonzero => self.count_trailing_zeroes(width, args[0].immediate()),
445445
sym::ctpop => self.pop_count(args[0].immediate()),
446446
sym::bswap => {
@@ -877,16 +877,46 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
877877
}
878878

879879
fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
880+
// if arg is 0, early return 0, else call count_leading_zeroes_nonzero to compute leading zeros
881+
let func = self.current_func();
882+
let then_block = func.new_block("then");
883+
let else_block = func.new_block("else");
884+
let after_block = func.new_block("after");
885+
886+
let result = func.new_local(None, self.u32_type, "zeros");
887+
let zero = self.cx.gcc_zero(arg.get_type());
888+
let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
889+
self.llbb().end_with_conditional(None, cond, then_block, else_block);
890+
891+
let zero_result = self.cx.gcc_uint(self.u32_type, width);
892+
then_block.add_assignment(None, result, zero_result);
893+
then_block.end_with_jump(None, after_block);
894+
895+
// NOTE: since jumps were added in a place count_leading_zeroes_nonzero() does not expect,
896+
// the current block in the state need to be updated.
897+
self.switch_to_block(else_block);
898+
899+
let zeros = self.count_leading_zeroes_nonzero(width, arg);
900+
self.llbb().add_assignment(None, result, zeros);
901+
self.llbb().end_with_jump(None, after_block);
902+
903+
// NOTE: since jumps were added in a place rustc does not
904+
// expect, the current block in the state need to be updated.
905+
self.switch_to_block(after_block);
906+
907+
result.to_rvalue()
908+
}
909+
910+
fn count_leading_zeroes_nonzero(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
880911
// TODO(antoyo): use width?
881-
let arg_type = arg.get_type();
882912
let result_type = self.u32_type;
913+
let mut arg_type = arg.get_type();
883914
let arg = if arg_type.is_signed(self.cx) {
884-
let new_type = arg_type.to_unsigned(self.cx);
885-
self.gcc_int_cast(arg, new_type)
915+
arg_type = arg_type.to_unsigned(self.cx);
916+
self.gcc_int_cast(arg, arg_type)
886917
} else {
887918
arg
888919
};
889-
let arg_type = arg.get_type();
890920
let count_leading_zeroes =
891921
// TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
892922
// instead of using is_uint().
@@ -900,51 +930,45 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
900930
"__builtin_clzll"
901931
}
902932
else if width == 128 {
903-
// Algorithm from: https://stackoverflow.com/a/28433850/389119
904-
let array_type = self.context.new_array_type(None, arg_type, 3);
933+
// arg is guaranteed to not be 0, so either its 64 high or 64 low bits are not 0
934+
// __buildin_clzll is UB when called with 0, so call it on the 64 high bits if they are not 0,
935+
// else call it on the 64 low bits and add 64. In the else case, 64 low bits can't be 0
936+
// because arg is not 0.
937+
905938
let result = self.current_func()
906-
.new_local(None, array_type, "count_loading_zeroes_results");
939+
.new_local(None, result_type, "count_leading_zeroes_results");
907940

941+
let ctlz_then_block = self.current_func().new_block("ctlz_then");
942+
let ctlz_else_block = self.current_func().new_block("ctlz_else");
943+
let ctlz_after_block = self.current_func().new_block("ctlz_after")
944+
;
908945
let sixty_four = self.const_uint(arg_type, 64);
909946
let shift = self.lshr(arg, sixty_four);
910947
let high = self.gcc_int_cast(shift, self.u64_type);
911-
let low = self.gcc_int_cast(arg, self.u64_type);
912-
913-
let zero = self.context.new_rvalue_zero(self.usize_type);
914-
let one = self.context.new_rvalue_one(self.usize_type);
915-
let two = self.context.new_rvalue_from_long(self.usize_type, 2);
916948

917949
let clzll = self.context.get_builtin_function("__builtin_clzll");
918950

919-
let first_elem = self.context.new_array_access(None, result, zero);
920-
let first_value = self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), arg_type);
921-
self.llbb()
922-
.add_assignment(self.location, first_elem, first_value);
923-
924-
let second_elem = self.context.new_array_access(self.location, result, one);
925-
let cast = self.gcc_int_cast(self.context.new_call(self.location, clzll, &[low]), arg_type);
926-
let second_value = self.add(cast, sixty_four);
927-
self.llbb()
928-
.add_assignment(self.location, second_elem, second_value);
929-
930-
let third_elem = self.context.new_array_access(self.location, result, two);
931-
let third_value = self.const_uint(arg_type, 128);
932-
self.llbb()
933-
.add_assignment(self.location, third_elem, third_value);
951+
let zero_hi = self.const_uint(high.get_type(), 0);
952+
let cond = self.gcc_icmp(IntPredicate::IntNE, high, zero_hi);
953+
self.llbb().end_with_conditional(self.location, cond, ctlz_then_block, ctlz_else_block);
954+
self.switch_to_block(ctlz_then_block);
934955

935-
let not_high = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, high);
936-
let not_low = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, low);
937-
let not_low_and_not_high = not_low & not_high;
938-
let index = not_high + not_low_and_not_high;
939-
// NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
940-
// gcc.
941-
// TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
942-
// compilation stage.
943-
let index = self.context.new_cast(self.location, index, self.i32_type);
956+
let result_128 =
957+
self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), result_type);
944958

945-
let res = self.context.new_array_access(self.location, result, index);
959+
ctlz_then_block.add_assignment(self.location, result, result_128);
960+
ctlz_then_block.end_with_jump(self.location, ctlz_after_block);
946961

947-
return self.gcc_int_cast(res.to_rvalue(), result_type);
962+
self.switch_to_block(ctlz_else_block);
963+
let low = self.gcc_int_cast(arg, self.u64_type);
964+
let low_leading_zeroes =
965+
self.gcc_int_cast(self.context.new_call(None, clzll, &[low]), result_type);
966+
let sixty_four_result_type = self.const_uint(result_type, 64);
967+
let result_128 = self.add(low_leading_zeroes, sixty_four_result_type);
968+
ctlz_else_block.add_assignment(self.location, result, result_128);
969+
ctlz_else_block.end_with_jump(self.location, ctlz_after_block);
970+
self.switch_to_block(ctlz_after_block);
971+
return result.to_rvalue();
948972
}
949973
else {
950974
let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll");

0 commit comments

Comments
 (0)