@@ -4,7 +4,7 @@ mod simd;
44#[ cfg( feature="master" ) ]
55use std:: iter;
66
7- use gccjit:: { ComparisonOp , Function , RValue , ToRValue , Type , UnaryOp , FunctionType } ;
7+ use gccjit:: { BinaryOp , ComparisonOp , Function , RValue , ToRValue , Type , UnaryOp , FunctionType } ;
88use rustc_codegen_ssa:: MemFlags ;
99use rustc_codegen_ssa:: base:: wants_msvc_seh;
1010use rustc_codegen_ssa:: common:: IntPredicate ;
@@ -820,74 +820,52 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
820820 } ;
821821
822822 if value_type. is_u128 ( & self . cx ) {
823- // TODO(antoyo): implement in the normal algorithm below to have a more efficient
824- // implementation (that does not require a call to __popcountdi2).
825- let popcount = self . context . get_builtin_function ( "__builtin_popcountll" ) ;
826823 let sixty_four = self . gcc_int ( value_type, 64 ) ;
827824 let right_shift = self . gcc_lshr ( value, sixty_four) ;
828825 let high = self . gcc_int_cast ( right_shift, self . cx . ulonglong_type ) ;
829- let high = self . context . new_call ( None , popcount , & [ high] ) ;
826+ let high = self . pop_count ( high) ;
830827 let low = self . gcc_int_cast ( value, self . cx . ulonglong_type ) ;
831- let low = self . context . new_call ( None , popcount , & [ low] ) ;
828+ let low = self . pop_count ( low) ;
832829 let res = high + low;
833830 return self . gcc_int_cast ( res, result_type) ;
834831 }
835832
836- // First step.
837- let mask = self . context . new_rvalue_from_long ( value_type, 0x5555555555555555 ) ;
838- let left = value & mask;
839- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 1 ) ;
840- let right = shifted & mask;
841- let value = left + right;
842-
843- // Second step.
844- let mask = self . context . new_rvalue_from_long ( value_type, 0x3333333333333333 ) ;
845- let left = value & mask;
846- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 2 ) ;
847- let right = shifted & mask;
848- let value = left + right;
849-
850- // Third step.
851- let mask = self . context . new_rvalue_from_long ( value_type, 0x0F0F0F0F0F0F0F0F ) ;
852- let left = value & mask;
853- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 4 ) ;
854- let right = shifted & mask;
855- let value = left + right;
856-
857- if value_type. is_u8 ( & self . cx ) {
858- return self . context . new_cast ( None , value, result_type) ;
859- }
860-
861- // Fourth step.
862- let mask = self . context . new_rvalue_from_long ( value_type, 0x00FF00FF00FF00FF ) ;
863- let left = value & mask;
864- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 8 ) ;
865- let right = shifted & mask;
866- let value = left + right;
867-
868- if value_type. is_u16 ( & self . cx ) {
869- return self . context . new_cast ( None , value, result_type) ;
870- }
871-
872- // Fifth step.
873- let mask = self . context . new_rvalue_from_long ( value_type, 0x0000FFFF0000FFFF ) ;
874- let left = value & mask;
875- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 16 ) ;
876- let right = shifted & mask;
877- let value = left + right;
878-
879- if value_type. is_u32 ( & self . cx ) {
880- return self . context . new_cast ( None , value, result_type) ;
881- }
882-
883- // Sixth step.
884- let mask = self . context . new_rvalue_from_long ( value_type, 0x00000000FFFFFFFF ) ;
885- let left = value & mask;
886- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 32 ) ;
887- let right = shifted & mask;
888- let value = left + right;
889-
890- self . context . new_cast ( None , value, result_type)
833+ // Use Wenger's algorithm for population count, gcc's seems to play better with it
834+ // for (int counter = 0; value != 0; counter++) {
835+ // value &= value - 1;
836+ // }
837+ let func = self . current_func . borrow ( ) . expect ( "func" ) ;
838+ let loop_head = func. new_block ( "head" ) ;
839+ let loop_body = func. new_block ( "body" ) ;
840+ let loop_tail = func. new_block ( "tail" ) ;
841+
842+ let counter_type = self . int_type ;
843+ let counter = self . current_func ( ) . new_local ( None , counter_type, "popcount_counter" ) ;
844+ let val = self . current_func ( ) . new_local ( None , value_type, "popcount_value" ) ;
845+ let zero = self . context . new_rvalue_zero ( counter_type) ;
846+ self . llbb ( ) . add_assignment ( None , counter, zero) ;
847+ self . llbb ( ) . add_assignment ( None , val, value) ;
848+ self . br ( loop_head) ;
849+
850+ // check if value isn't zero
851+ self . switch_to_block ( loop_head) ;
852+ let zero = self . context . new_rvalue_zero ( value_type) ;
853+ let cond = self . context . new_comparison ( None , ComparisonOp :: NotEquals , val. to_rvalue ( ) , zero) ;
854+ self . cond_br ( cond, loop_body, loop_tail) ;
855+
856+ // val &= val - 1;
857+ self . switch_to_block ( loop_body) ;
858+ let sub = val. to_rvalue ( ) - self . context . new_rvalue_one ( value_type) ;
859+ loop_body. add_assignment_op ( None , val, BinaryOp :: BitwiseAnd , sub) ;
860+
861+ // counter += 1
862+ let one = self . context . new_rvalue_one ( counter_type) ;
863+ loop_body. add_assignment_op ( None , counter, BinaryOp :: Plus , one) ;
864+ self . br ( loop_head) ;
865+
866+ // end of loop
867+ self . switch_to_block ( loop_tail) ;
868+ self . context . new_cast ( None , counter. to_rvalue ( ) , result_type)
891869 }
892870
893871 // Algorithm from: https://blog.regehr.org/archives/1063
0 commit comments