@@ -525,45 +525,22 @@ mod hw {
525525 /// the width of the register (32 to 64 bits, e.g. 48-bit seems common).
526526 #[ inline( always) ]
527527 fn rdpmc ( reg_idx : u32 ) -> u64 {
528- let ( lo, hi) : ( u32 , u32 ) ;
529- unsafe {
530- // NOTE(eddyb) below comment is outdated (the other branch uses `cpuid`).
531- if cfg ! ( unserialized_rdpmc) && false {
532- // FIXME(eddyb) the Intel and AMD manuals warn about the need for
533- // "serializing instructions" before/after `rdpmc`, if avoiding any
534- // reordering is desired, but do not agree on the full set of usable
535- // "serializing instructions" (e.g. `mfence` isn't listed in both).
536- //
537- // The only usable, and guaranteed to work, "serializing instruction"
538- // appears to be `cpuid`, but it doesn't seem easy to use, especially
539- // due to the overlap in registers with `rdpmc` itself, and it might
540- // have too high of a cost, compared to serialization benefits (if any).
541- asm ! ( "rdpmc" , in( "ecx" ) reg_idx, out( "eax" ) lo, out( "edx" ) hi, options( nostack) ) ;
542- } else {
543- asm ! (
544- // Dummy `cpuid(0)` to serialize instruction execution.
545- "xor eax, eax" ,
546- // LLVM sometimes reserves `ebx` for its internal use, we so we need to use
547- // a scratch register for it instead.
548- "mov {tmp_rbx:r}, rbx" ,
549- "cpuid" ,
550- "mov rbx, {tmp_rbx:r}" ,
551-
552- "mov ecx, {rdpmc_ecx:e}" ,
553- "rdpmc" ,
554- rdpmc_ecx = in( reg) reg_idx,
555- tmp_rbx = out( reg) _,
556- out( "eax" ) lo,
557- out( "edx" ) hi,
558-
559- // `cpuid` clobbers (not overwritten by `rdpmc`).
560- out( "ecx" ) _,
561-
562- options( nostack) ,
563- ) ;
564- }
528+ // NOTE(eddyb) below comment is outdated (the other branch uses `cpuid`).
529+ if cfg ! ( unserialized_rdpmc) && false {
530+ // FIXME(eddyb) the Intel and AMD manuals warn about the need for
531+ // "serializing instructions" before/after `rdpmc`, if avoiding any
532+ // reordering is desired, but do not agree on the full set of usable
533+ // "serializing instructions" (e.g. `mfence` isn't listed in both).
534+ //
535+ // The only usable, and guaranteed to work, "serializing instruction"
536+ // appears to be `cpuid`, but it doesn't seem easy to use, especially
537+ // due to the overlap in registers with `rdpmc` itself, and it might
538+ // have too high of a cost, compared to serialization benefits (if any).
539+ unserialized_rdpmc ( reg_idx)
540+ } else {
541+ serialize_instruction_execution ( ) ;
542+ unserialized_rdpmc ( reg_idx)
565543 }
566- lo as u64 | ( hi as u64 ) << 32
567544 }
568545
569546 /// Read two hardware performance counters at once (see `rdpmc`).
@@ -572,42 +549,49 @@ mod hw {
572549 /// only requires one "serializing instruction", rather than two.
573550 #[ inline( always) ]
574551 fn rdpmc_pair ( a_reg_idx : u32 , b_reg_idx : u32 ) -> ( u64 , u64 ) {
575- let ( a_lo, a_hi) : ( u32 , u32 ) ;
576- let ( b_lo, b_hi) : ( u32 , u32 ) ;
552+ serialize_instruction_execution ( ) ;
553+ ( unserialized_rdpmc ( a_reg_idx) , unserialized_rdpmc ( b_reg_idx) )
554+ }
555+
556+ /// Dummy `cpuid(0)` to serialize instruction execution.
557+ #[ inline( always) ]
558+ fn serialize_instruction_execution ( ) {
577559 unsafe {
578560 asm ! (
579- // Dummy `cpuid(0)` to serialize instruction execution.
580561 "xor eax, eax" ,
581- // LLVM sometimes reserves `ebx` for its internal use, we so we need to use
562+ // LLVM sometimes reserves `ebx` for its internal use, so we need to use
582563 // a scratch register for it instead.
583564 "mov {tmp_rbx:r}, rbx" ,
584565 "cpuid" ,
585566 "mov rbx, {tmp_rbx:r}" ,
586-
587- "mov ecx, {a_rdpmc_ecx:e}" ,
588- "rdpmc" ,
589- "mov {a_rdpmc_eax:e}, eax" ,
590- "mov {a_rdpmc_edx:e}, edx" ,
591- "mov ecx, {b_rdpmc_ecx:e}" ,
592- "rdpmc" ,
593- a_rdpmc_ecx = in( reg) a_reg_idx,
594- a_rdpmc_eax = out( reg) a_lo,
595- a_rdpmc_edx = out( reg) a_hi,
596- b_rdpmc_ecx = in( reg) b_reg_idx,
597- tmp_rbx = out( reg) _,
598- out( "eax" ) b_lo,
599- out( "edx" ) b_hi,
600-
601- // `cpuid` clobbers (not overwritten by `rdpmc`).
602- out( "ecx" ) _,
567+ tmp_rbx = lateout( reg) _,
568+ // `cpuid` clobbers.
569+ lateout( "eax" ) _,
570+ lateout( "edx" ) _,
571+ lateout( "ecx" ) _,
603572
604573 options( nostack) ,
605574 ) ;
606575 }
607- (
608- a_lo as u64 | ( a_hi as u64 ) << 32 ,
609- b_lo as u64 | ( b_hi as u64 ) << 32 ,
610- )
576+ }
577+
578+ /// Read the hardware performance counter indicated by `reg_idx`.
579+ ///
580+ /// If the counter is signed, sign extension should be performed based on
581+ /// the width of the register (32 to 64 bits, e.g. 48-bit seems common).
582+ #[ inline( always) ]
583+ fn unserialized_rdpmc ( reg_idx : u32 ) -> u64 {
584+ let ( lo, hi) : ( u32 , u32 ) ;
585+ unsafe {
586+ asm ! (
587+ "rdpmc" ,
588+ in( "ecx" ) reg_idx,
589+ lateout( "eax" ) lo,
590+ lateout( "edx" ) hi,
591+ options( nostack)
592+ ) ;
593+ }
594+ lo as u64 | ( hi as u64 ) << 32
611595 }
612596
613597 /// Categorization of `x86_64` CPUs, primarily based on how they
0 commit comments