@@ -405,7 +405,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
405405 | sym:: saturating_sub => {
406406 match int_type_width_signed ( args[ 0 ] . layout . ty , self ) {
407407 Some ( ( width, signed) ) => match name {
408- sym:: ctlz | sym:: cttz => {
408+ sym:: ctlz => self . count_leading_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
409+
410+ sym:: cttz => {
409411 let func = self . current_func ( ) ;
410412 let then_block = func. new_block ( "then" ) ;
411413 let else_block = func. new_block ( "else" ) ;
@@ -426,11 +428,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
426428 // in the state need to be updated.
427429 self . switch_to_block ( else_block) ;
428430
429- let zeros = match name {
430- sym:: ctlz => self . count_leading_zeroes ( width, arg) ,
431- sym:: cttz => self . count_trailing_zeroes ( width, arg) ,
432- _ => unreachable ! ( ) ,
433- } ;
431+ let zeros = self . count_trailing_zeroes ( width, arg) ;
434432 self . llbb ( ) . add_assignment ( None , result, zeros) ;
435433 self . llbb ( ) . end_with_jump ( None , after_block) ;
436434
@@ -440,7 +438,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
440438
441439 result. to_rvalue ( )
442440 }
443- sym:: ctlz_nonzero => self . count_leading_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
441+ sym:: ctlz_nonzero => {
442+ self . count_leading_zeroes_nonzero ( width, args[ 0 ] . immediate ( ) )
443+ }
444444 sym:: cttz_nonzero => self . count_trailing_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
445445 sym:: ctpop => self . pop_count ( args[ 0 ] . immediate ( ) ) ,
446446 sym:: bswap => {
@@ -877,16 +877,46 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
877877 }
878878
879879 fn count_leading_zeroes ( & mut self , width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
880+ // if arg is 0, early return 0, else call count_leading_zeroes_nonzero to compute leading zeros
881+ let func = self . current_func ( ) ;
882+ let then_block = func. new_block ( "then" ) ;
883+ let else_block = func. new_block ( "else" ) ;
884+ let after_block = func. new_block ( "after" ) ;
885+
886+ let result = func. new_local ( None , self . u32_type , "zeros" ) ;
887+ let zero = self . cx . gcc_zero ( arg. get_type ( ) ) ;
888+ let cond = self . gcc_icmp ( IntPredicate :: IntEQ , arg, zero) ;
889+ self . llbb ( ) . end_with_conditional ( None , cond, then_block, else_block) ;
890+
891+ let zero_result = self . cx . gcc_uint ( self . u32_type , width) ;
892+ then_block. add_assignment ( None , result, zero_result) ;
893+ then_block. end_with_jump ( None , after_block) ;
894+
895+ // NOTE: since jumps were added in a place count_leading_zeroes_nonzero() does not expect,
896+ // the current block in the state need to be updated.
897+ self . switch_to_block ( else_block) ;
898+
899+ let zeros = self . count_leading_zeroes_nonzero ( width, arg) ;
900+ self . llbb ( ) . add_assignment ( None , result, zeros) ;
901+ self . llbb ( ) . end_with_jump ( None , after_block) ;
902+
903+ // NOTE: since jumps were added in a place rustc does not
904+ // expect, the current block in the state need to be updated.
905+ self . switch_to_block ( after_block) ;
906+
907+ result. to_rvalue ( )
908+ }
909+
910+ fn count_leading_zeroes_nonzero ( & mut self , width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
880911 // TODO(antoyo): use width?
881- let arg_type = arg. get_type ( ) ;
882912 let result_type = self . u32_type ;
913+ let mut arg_type = arg. get_type ( ) ;
883914 let arg = if arg_type. is_signed ( self . cx ) {
884- let new_type = arg_type. to_unsigned ( self . cx ) ;
885- self . gcc_int_cast ( arg, new_type )
915+ arg_type = arg_type. to_unsigned ( self . cx ) ;
916+ self . gcc_int_cast ( arg, arg_type )
886917 } else {
887918 arg
888919 } ;
889- let arg_type = arg. get_type ( ) ;
890920 let count_leading_zeroes =
891921 // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
892922 // instead of using is_uint().
@@ -900,51 +930,45 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
900930 "__builtin_clzll"
901931 }
902932 else if width == 128 {
903- // Algorithm from: https://stackoverflow.com/a/28433850/389119
904- let array_type = self . context . new_array_type ( None , arg_type, 3 ) ;
933+ // arg is guaranteed to not be 0, so either its 64 high or 64 low bits are not 0
934+ // __buildin_clzll is UB when called with 0, so call it on the 64 high bits if they are not 0,
935+ // else call it on the 64 low bits and add 64. In the else case, 64 low bits can't be 0
936+ // because arg is not 0.
937+
905938 let result = self . current_func ( )
906- . new_local ( None , array_type , "count_loading_zeroes_results " ) ;
939+ . new_local ( None , result_type , "count_leading_zeroes_results " ) ;
907940
941+ let ctlz_then_block = self . current_func ( ) . new_block ( "ctlz_then" ) ;
942+ let ctlz_else_block = self . current_func ( ) . new_block ( "ctlz_else" ) ;
943+ let ctlz_after_block = self . current_func ( ) . new_block ( "ctlz_after" )
944+ ;
908945 let sixty_four = self . const_uint ( arg_type, 64 ) ;
909946 let shift = self . lshr ( arg, sixty_four) ;
910947 let high = self . gcc_int_cast ( shift, self . u64_type ) ;
911- let low = self . gcc_int_cast ( arg, self . u64_type ) ;
912-
913- let zero = self . context . new_rvalue_zero ( self . usize_type ) ;
914- let one = self . context . new_rvalue_one ( self . usize_type ) ;
915- let two = self . context . new_rvalue_from_long ( self . usize_type , 2 ) ;
916948
917949 let clzll = self . context . get_builtin_function ( "__builtin_clzll" ) ;
918950
919- let first_elem = self . context . new_array_access ( None , result, zero) ;
920- let first_value = self . gcc_int_cast ( self . context . new_call ( None , clzll, & [ high] ) , arg_type) ;
921- self . llbb ( )
922- . add_assignment ( self . location , first_elem, first_value) ;
923-
924- let second_elem = self . context . new_array_access ( self . location , result, one) ;
925- let cast = self . gcc_int_cast ( self . context . new_call ( self . location , clzll, & [ low] ) , arg_type) ;
926- let second_value = self . add ( cast, sixty_four) ;
927- self . llbb ( )
928- . add_assignment ( self . location , second_elem, second_value) ;
929-
930- let third_elem = self . context . new_array_access ( self . location , result, two) ;
931- let third_value = self . const_uint ( arg_type, 128 ) ;
932- self . llbb ( )
933- . add_assignment ( self . location , third_elem, third_value) ;
951+ let zero_hi = self . const_uint ( high. get_type ( ) , 0 ) ;
952+ let cond = self . gcc_icmp ( IntPredicate :: IntNE , high, zero_hi) ;
953+ self . llbb ( ) . end_with_conditional ( self . location , cond, ctlz_then_block, ctlz_else_block) ;
954+ self . switch_to_block ( ctlz_then_block) ;
934955
935- let not_high = self . context . new_unary_op ( self . location , UnaryOp :: LogicalNegate , self . u64_type , high) ;
936- let not_low = self . context . new_unary_op ( self . location , UnaryOp :: LogicalNegate , self . u64_type , low) ;
937- let not_low_and_not_high = not_low & not_high;
938- let index = not_high + not_low_and_not_high;
939- // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
940- // gcc.
941- // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
942- // compilation stage.
943- let index = self . context . new_cast ( self . location , index, self . i32_type ) ;
956+ let result_128 =
957+ self . gcc_int_cast ( self . context . new_call ( None , clzll, & [ high] ) , result_type) ;
944958
945- let res = self . context . new_array_access ( self . location , result, index) ;
959+ ctlz_then_block. add_assignment ( self . location , result, result_128) ;
960+ ctlz_then_block. end_with_jump ( self . location , ctlz_after_block) ;
946961
947- return self . gcc_int_cast ( res. to_rvalue ( ) , result_type) ;
962+ self . switch_to_block ( ctlz_else_block) ;
963+ let low = self . gcc_int_cast ( arg, self . u64_type ) ;
964+ let low_leading_zeroes =
965+ self . gcc_int_cast ( self . context . new_call ( None , clzll, & [ low] ) , result_type) ;
966+ let sixty_four_result_type = self . const_uint ( result_type, 64 ) ;
967+ let result_128 = self . add ( low_leading_zeroes, sixty_four_result_type) ;
968+ ctlz_else_block. add_assignment ( self . location , result, result_128) ;
969+ ctlz_else_block. end_with_jump ( self . location , ctlz_after_block) ;
970+ self . switch_to_block ( ctlz_after_block) ;
971+ return result. to_rvalue ( ) ;
948972 }
949973 else {
950974 let count_leading_zeroes = self . context . get_builtin_function ( "__builtin_clzll" ) ;
0 commit comments