@@ -525,47 +525,22 @@ mod hw {
525525 /// the width of the register (32 to 64 bits, e.g. 48-bit seems common).
526526 #[ inline( always) ]
527527 fn rdpmc ( reg_idx : u32 ) -> u64 {
528- let ( lo, hi) : ( u32 , u32 ) ;
529- unsafe {
530- // NOTE(eddyb) below comment is outdated (the other branch uses `cpuid`).
531- if cfg ! ( unserialized_rdpmc) && false {
532- // FIXME(eddyb) the Intel and AMD manuals warn about the need for
533- // "serializing instructions" before/after `rdpmc`, if avoiding any
534- // reordering is desired, but do not agree on the full set of usable
535- // "serializing instructions" (e.g. `mfence` isn't listed in both).
536- //
537- // The only usable, and guaranteed to work, "serializing instruction"
538- // appears to be `cpuid`, but it doesn't seem easy to use, especially
539- // due to the overlap in registers with `rdpmc` itself, and it might
540- // have too high of a cost, compared to serialization benefits (if any).
541- asm ! ( "rdpmc" , in( "ecx" ) reg_idx, out( "eax" ) lo, out( "edx" ) hi, options( nostack) ) ;
542- } else {
543- asm ! (
544- // Dummy `cpuid(0)` to serialize instruction execution.
545- "xor %eax, %eax" , // Intel syntax: "xor eax, eax"
546- "cpuid" ,
547-
548- "mov {rdpmc_ecx:e}, %ecx" , // Intel syntax: "mov ecx, {rdpmc_ecx:e}"
549- "rdpmc" ,
550- rdpmc_ecx = in( reg) reg_idx,
551- out( "eax" ) lo,
552- out( "edx" ) hi,
553-
554- // `cpuid` clobbers (not overwritten by `rdpmc`).
555- out( "ebx" ) _,
556- out( "ecx" ) _,
557-
558- options( nostack) ,
559-
560- // HACK(eddyb) LLVM 9 and older do not support modifiers
561- // in Intel syntax inline asm; whenever Rust minimum LLVM
562- // version becomes LLVM 10, remove and replace above
563- // instructions with Intel syntax version (from comments).
564- options( att_syntax) ,
565- ) ;
566- }
528+ // NOTE(eddyb) below comment is outdated (the other branch uses `cpuid`).
529+ if cfg ! ( unserialized_rdpmc) && false {
530+ // FIXME(eddyb) the Intel and AMD manuals warn about the need for
531+ // "serializing instructions" before/after `rdpmc`, if avoiding any
532+ // reordering is desired, but do not agree on the full set of usable
533+ // "serializing instructions" (e.g. `mfence` isn't listed in both).
534+ //
535+ // The only usable, and guaranteed to work, "serializing instruction"
536+ // appears to be `cpuid`, but it doesn't seem easy to use, especially
537+ // due to the overlap in registers with `rdpmc` itself, and it might
538+ // have too high of a cost, compared to serialization benefits (if any).
539+ unserialized_rdpmc ( reg_idx)
540+ } else {
541+ serialize_instruction_execution ( ) ;
542+ unserialized_rdpmc ( reg_idx)
567543 }
568- lo as u64 | ( hi as u64 ) << 32
569544 }
570545
571546 /// Read two hardware performance counters at once (see `rdpmc`).
@@ -574,44 +549,49 @@ mod hw {
574549 /// only requires one "serializing instruction", rather than two.
575550 #[ inline( always) ]
576551 fn rdpmc_pair ( a_reg_idx : u32 , b_reg_idx : u32 ) -> ( u64 , u64 ) {
577- let ( a_lo, a_hi) : ( u32 , u32 ) ;
578- let ( b_lo, b_hi) : ( u32 , u32 ) ;
552+ serialize_instruction_execution ( ) ;
553+ ( unserialized_rdpmc ( a_reg_idx) , unserialized_rdpmc ( b_reg_idx) )
554+ }
555+
556+ /// Dummy `cpuid(0)` to serialize instruction execution.
557+ #[ inline( always) ]
558+ fn serialize_instruction_execution ( ) {
579559 unsafe {
580560 asm ! (
581- // Dummy `cpuid(0)` to serialize instruction execution.
582- "xor %eax, %eax" , // Intel syntax: "xor eax, eax"
561+ "xor eax, eax" ,
562+ // LLVM sometimes reserves `ebx` for its internal use, so we need to use
563+ // a scratch register for it instead.
564+ "mov {tmp_rbx:r}, rbx" ,
583565 "cpuid" ,
584-
585- "mov {a_rdpmc_ecx:e}, %ecx" , // Intel syntax: "mov ecx, {a_rdpmc_ecx:e}"
586- "rdpmc" ,
587- "mov %eax, {a_rdpmc_eax:e}" , // Intel syntax: "mov {a_rdpmc_eax:e}, eax"
588- "mov %edx, {a_rdpmc_edx:e}" , // Intel syntax: "mov {a_rdpmc_edx:e}, edx"
589- "mov {b_rdpmc_ecx:e}, %ecx" , // Intel syntax: "mov ecx, {b_rdpmc_ecx:e}"
590- "rdpmc" ,
591- a_rdpmc_ecx = in( reg) a_reg_idx,
592- a_rdpmc_eax = out( reg) a_lo,
593- a_rdpmc_edx = out( reg) a_hi,
594- b_rdpmc_ecx = in( reg) b_reg_idx,
595- out( "eax" ) b_lo,
596- out( "edx" ) b_hi,
597-
598- // `cpuid` clobbers (not overwritten by `rdpmc`).
599- out( "ebx" ) _,
600- out( "ecx" ) _,
566+ "mov rbx, {tmp_rbx:r}" ,
567+ tmp_rbx = lateout( reg) _,
568+ // `cpuid` clobbers.
569+ lateout( "eax" ) _,
570+ lateout( "edx" ) _,
571+ lateout( "ecx" ) _,
601572
602573 options( nostack) ,
574+ ) ;
575+ }
576+ }
603577
604- // HACK(eddyb) LLVM 9 and older do not support modifiers
605- // in Intel syntax inline asm; whenever Rust minimum LLVM
606- // version becomes LLVM 10, remove and replace above
607- // instructions with Intel syntax version (from comments).
608- options( att_syntax) ,
578+ /// Read the hardware performance counter indicated by `reg_idx`.
579+ ///
580+ /// If the counter is signed, sign extension should be performed based on
581+ /// the width of the register (32 to 64 bits, e.g. 48-bit seems common).
582+ #[ inline( always) ]
583+ fn unserialized_rdpmc ( reg_idx : u32 ) -> u64 {
584+ let ( lo, hi) : ( u32 , u32 ) ;
585+ unsafe {
586+ asm ! (
587+ "rdpmc" ,
588+ in( "ecx" ) reg_idx,
589+ lateout( "eax" ) lo,
590+ lateout( "edx" ) hi,
591+ options( nostack)
609592 ) ;
610593 }
611- (
612- a_lo as u64 | ( a_hi as u64 ) << 32 ,
613- b_lo as u64 | ( b_hi as u64 ) << 32 ,
614- )
594+ lo as u64 | ( hi as u64 ) << 32
615595 }
616596
617597 /// Categorization of `x86_64` CPUs, primarily based on how they
@@ -815,17 +795,10 @@ mod hw {
815795 let mut _tmp: u64 = 0 ;
816796 unsafe {
817797 asm ! (
818- // Intel syntax: "lock xadd [{atomic}], {tmp}"
819- "lock xadd {tmp}, ({atomic})" ,
798+ "lock xadd qword ptr [{atomic}], {tmp}" ,
820799
821800 atomic = in( reg) & mut atomic,
822801 tmp = inout( reg) _tmp,
823-
824- // HACK(eddyb) LLVM 9 and older do not support modifiers
825- // in Intel syntax inline asm; whenever Rust minimum LLVM
826- // version becomes LLVM 10, remove and replace above
827- // instructions with Intel syntax version (from comments).
828- options( att_syntax) ,
829802 ) ;
830803 }
831804
0 commit comments