-
Notifications
You must be signed in to change notification settings - Fork 8
/
tsc.h
62 lines (58 loc) · 2.87 KB
/
tsc.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
// 2019, Georg Sauthoff <mail@gms.tf>
//
// SPDX-License-Identifier: GPL-3.0-or-later
// Read Time-Stamp Counter
extern __inline uint64_t __attribute__((__gnu_inline__, __always_inline__, __artificial__))
fenced_rdtsc(void)
{
uint64_t x;
asm volatile (
".intel_syntax noprefix \n\t" // switch to prettier syntax
// 'If software requires RDTSC to be executed only after all previous
// instructions have executed and all previous loads and stores are
// globally visible, it can execute the sequence MFENCE;LFENCE
// immediately before RDTSC.'
// https://www.felixcloutier.com/x86/rdtsc
"mfence \n\t"
"lfence \n\t"
// similar effect, execute CPUID before RDTSC
// cf. https://www.intel.de/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf
//"cpuid \n\t" // writes to EAX, EBX, ECX, EDX
"rdtsc \n\t" // counter into EDX:EAX
"shl rdx, 0x20 \n\t" // shift higher-half left
"or rax, rdx \n\t" // combine them
".att_syntax prefix \n\t" // switch back to the default syntax
: "=a" (x) // output operands,
// i.e. overwrites (=) R'a'X which is mapped to x
: // input operands
: "rdx"); // additional clobbers (with cpuid also: rbx, rcx)
return x;
}
// Read Time-Stamp Counter and Processor ID
// 'The RDTSCP instruction is not a serializing instruction, but it does wait
// until all previous instructions have executed and all previous loads are
// globally visible.'
// https://www.felixcloutier.com/x86/rdtscp
extern __inline uint64_t __attribute__((__gnu_inline__, __always_inline__, __artificial__))
fenced_rdtscp(void)
{
uint64_t x;
asm volatile (
".intel_syntax noprefix \n\t"
"rdtscp \n\t" // counter into EDX:EAX, id into ECX
// 'If software requires RDTSCP to be executed prior to execution of
// any subsequent instruction (including any memory accesses), it can
// execute LFENCE immediately after RDTSCP.'
// https://www.felixcloutier.com/x86/rdtscp
"lfence \n\t" // better than CPUID
// alternatively call CPUID (clobbers more registers, though)
// cf. https://www.intel.de/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf
"shl rdx, 0x20 \n\t" // shift higher-half left
"or rax, rdx \n\t" // combine them
".att_syntax prefix \n\t"
: "=a" (x) // output operands,
// i.e. overwrites (=) R'a'X which is mapped to x
: // input operands
: "rdx", "rcx"); // additional clobbers
return x;
}