Skip to content

Commit

Permalink
winapi: Less assembly, good enough(tm)
Browse files Browse the repository at this point in the history
  • Loading branch information
GXTX committed Oct 24, 2023
1 parent eb4385b commit 2f6d75b
Showing 1 changed file with 19 additions and 38 deletions.
57 changes: 19 additions & 38 deletions lib/winapi/profiling.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

// SPDX-FileCopyrightText: 2019 Stefan Schmidt

#define USE_RDTSC_FOR_FREQ
#include <profileapi.h>
#ifdef USE_RDTSC_FOR_FREQ
#include <synchapi.h>
Expand All @@ -19,6 +20,7 @@ static void __attribute__((constructor)) PrimeQueryPerformanceFrequency ()
#define NV_PTIMER_NUM *(volatile ULONG *)0xFD009200
#define NV_PTIMER_DEN *(volatile ULONG *)0xFD009210
#define NV_PTIMER_COUNT 0xFD009400
#define ptr_PTIMER_COUNT *(volatile ULONG *)0xFD009400
#define ASM_LOOPS 1024 * 4

ULARGE_INTEGER rdtsc_count_1 = {{0, 0}}, rdtsc_count_2 = {{0, 0}};
Expand All @@ -33,60 +35,38 @@ static void __attribute__((constructor)) PrimeQueryPerformanceFrequency ()

KeEnterCriticalRegion();

// Turn off caches
__asm
{
push eax
push edx
push ecx

cli
sfence

// Turn off caches
mov eax, cr0
or eax, 1 << 30 // Set CD bit
mov cr0, eax
wbinvd

// Reset PTIMER
mov eax, [NV_PTIMER_COUNT]
and eax, ~(0xFFFFFFE0) // First 5 bits are not used
mov [NV_PTIMER_COUNT], eax

rdtsc
mov rdtsc_count_1.LowPart, eax
mov rdtsc_count_1.HighPart, edx

mov eax, [NV_PTIMER_COUNT]
mov ptimer_count_1, eax

// Spin for a bit
mov eax, ASM_LOOPS
loop_1:
dec eax
jnz loop_1

rdtsc
mov rdtsc_count_2.LowPart, eax
mov rdtsc_count_2.HighPart, edx

mov eax, [NV_PTIMER_COUNT]
mov ptimer_count_2, eax

// Without this, invaldidating the cache below will crash the system
sfence
}

ptr_PTIMER_COUNT &= ~(0xFFFFFFE0); // First 5 bits are not used

rdtsc_count_1.QuadPart = __rdtsc();
ptimer_count_1 = ptr_PTIMER_COUNT;

KeStallExecutionProcessor(10);

rdtsc_count_2.QuadPart = __rdtsc();
ptimer_count_2 = ptr_PTIMER_COUNT;

__asm
{
sfence
mov eax, cr0
and eax, ~(1 << 30) // Clear CD bit
mov cr0, eax
wbinvd

sti

pop ecx
pop edx
pop eax
}

KeLeaveCriticalRegion();

double ptimer_diff = (ptimer_count_2 >> 5) - (ptimer_count_1 >> 5);
Expand Down Expand Up @@ -116,6 +96,7 @@ BOOL QueryPerformanceFrequency (LARGE_INTEGER *lpFrequency)
assert(lpFrequency != NULL);

#ifdef USE_RDTSC_FOR_FREQ
PrimeQueryPerformanceFrequency();
lpFrequency->QuadPart = frequency.QuadPart;
#else
lpFrequency->QuadPart = 733333333;
Expand Down

0 comments on commit 2f6d75b

Please sign in to comment.