Skip to content

Commit

Permalink
Adding support for GPU counters in UE4 plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
bombomby committed Jan 30, 2022
1 parent 2ee1888 commit c2ec292
Show file tree
Hide file tree
Showing 6 changed files with 241 additions and 272 deletions.
4 changes: 2 additions & 2 deletions gui/Optick/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,5 @@
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.3.2.0")]
[assembly: AssemblyFileVersion("1.3.2.0")]
[assembly: AssemblyVersion("1.3.3.0")]
[assembly: AssemblyFileVersion("1.3.3.0")]
4 changes: 0 additions & 4 deletions samples/UnrealEnginePlugin/Source/OptickPlugin.Build.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,12 @@ public OptickPlugin(ReadOnlyTargetRules Target) : base(Target)
}
);

/*
#if UE_4_24_OR_LATER
PublicDefinitions.AddRange(
new string[]
{
"OPTICK_UE4_GPU=1",
}
);
#endif
*/

if (Target.bBuildEditor == true)
{
Expand Down
262 changes: 133 additions & 129 deletions samples/UnrealEnginePlugin/Source/Private/OptickPlugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "CoreMinimal.h"
#include "Containers/Ticker.h"
#include "GenericPlatform/GenericPlatformFile.h"
#include "HAL/X.h"
#include "HAL/PlatformFilemanager.h"
#include "HAL/PlatformProcess.h"
#include "Misc/EngineVersion.h"
#include "Misc/CoreDelegates.h"
Expand Down Expand Up @@ -112,10 +112,12 @@ class FOptickPlugin : public IOptickPlugin
uint64 Convert32bitCPUTimestamp(int64 timestamp) const;

#ifdef OPTICK_UE4_GPU

void OnEndFrameRT();
uint64 ConvertGPUTimestamp(uint64 timestamp);
uint64 ConvertGPUTimestamp(uint64 timestamp, int GPUIndex);

FGPUTimingCalibrationTimestamp CalibrationTimestamp;
bool UpdateCalibrationTimestamp(FRealtimeGPUProfilerFrameImpl* Frame, int GPUIndex);
FGPUTimingCalibrationTimestamp CalibrationTimestamps[MAX_NUM_GPUS];
#endif

public:
Expand Down Expand Up @@ -294,8 +296,6 @@ void FOptickPlugin::StartCapture()
if (!IsCapturing)
{
#ifdef OPTICK_UE4_GPU
CalibrationTimestamp = FGPUTiming::GetCalibrationTimestamp();

GPUThreadStorage.Reset();
for (auto& pair : StorageMap)
pair.Value->Reset();
Expand Down Expand Up @@ -393,7 +393,57 @@ uint64 FOptickPlugin::Convert32bitCPUTimestamp(int64 timestamp) const
}

#ifdef OPTICK_UE4_GPU
#if UE_4_24_OR_LATER

bool FOptickPlugin::UpdateCalibrationTimestamp(FRealtimeGPUProfilerFrameImpl* Frame, int GPUIndex)
{
FGPUTimingCalibrationTimestamp& CalibrationTimestamp = CalibrationTimestamps[GPUIndex];
CalibrationTimestamp = FGPUTimingCalibrationTimestamp{ 0, 0 };

if (Frame->TimestampCalibrationQuery.IsValid())
{
#if UE_4_27_OR_LATER
CalibrationTimestamp.GPUMicroseconds = Frame->TimestampCalibrationQuery->GPUMicroseconds[GPUIndex];
CalibrationTimestamp.CPUMicroseconds = Frame->TimestampCalibrationQuery->CPUMicroseconds[GPUIndex];
#else
CalibrationTimestamp.GPUMicroseconds = Frame->TimestampCalibrationQuery->GPUMicroseconds;
CalibrationTimestamp.CPUMicroseconds = Frame->TimestampCalibrationQuery->CPUMicroseconds;
#endif
}

if (CalibrationTimestamp.GPUMicroseconds == 0 || CalibrationTimestamp.CPUMicroseconds == 0) // Unimplemented platforms, or invalid on the first frame
{
if (Frame->GpuProfilerEvents.Num() > 1)
{
// Align CPU and GPU frames
CalibrationTimestamp.GPUMicroseconds = Frame->GpuProfilerEvents[1].GetStartResultMicroseconds(0);
CalibrationTimestamp.CPUMicroseconds = FPlatformTime::ToSeconds64(Frame->CPUFrameStartTimestamp) * 1000 * 1000;
}
else
{
// Fallback to legacy
CalibrationTimestamp = FGPUTiming::GetCalibrationTimestamp();
}
}

return CalibrationTimestamp.GPUMicroseconds != 0 && CalibrationTimestamp.CPUMicroseconds != 0;
}

struct TimeRange
{
uint64 Start;
uint64 Finish;
bool IsOverlap(TimeRange other) const
{
return !((Finish < other.Start) || (other.Finish < Start));
}
bool IsValid() const
{
return Start != 0 && Finish != 0 && Finish > Start;
}
TimeRange() : Start(0), Finish(0) {}
TimeRange(uint64 start, uint64 finish) : Start(start), Finish(finish) {}
};

void FOptickPlugin::OnEndFrameRT()
{
FScopeLock ScopeLock(&UpdateCriticalSection);
Expand All @@ -418,6 +468,7 @@ void FOptickPlugin::OnEndFrameRT()

if (!Event.GatherQueryResults(RHICmdList))
{

#if !(UE_BUILD_SHIPPING || UE_BUILD_TEST)
UE_LOG(OptickLog, Warning, TEXT("Query is not ready."));
#endif
Expand All @@ -426,162 +477,106 @@ void FOptickPlugin::OnEndFrameRT()
}
}

for (int32 i = 1; i < NumEventsThisFramePlusOne; ++i)
{
FRealtimeGPUProfilerEventImpl& Event = Frame->GpuProfilerEvents[i];

const FName Name = Event.Name;

Optick::EventDescription* Description = nullptr;

if (Optick::EventDescription** ppDescription = GPUDescriptionMap.Find(Name))
{
Description = *ppDescription;
}
else
{
Description = Optick::EventDescription::CreateShared(TCHAR_TO_ANSI(*Name.ToString()));
GPUDescriptionMap.Add(Name, Description);
}

uint64 startTimestamp = ConvertGPUTimestamp(Event.StartResultMicroseconds);
uint64 endTimestamp = ConvertGPUTimestamp(Event.EndResultMicroseconds);
TArray<TimeRange> EventStack;

if (Name == NAME_GPU_Unaccounted)
{
OPTICK_FRAME_FLIP(Optick::FrameType::GPU, startTimestamp);
if (NumEventsThisFramePlusOne <= 1)
return;

if (GPUThreadStorage.LastTimestamp != 0)
{
OPTICK_STORAGE_POP(GPUThreadStorage.EventStorage, GPUThreadStorage.LastTimestamp);
}

OPTICK_STORAGE_PUSH(GPUThreadStorage.EventStorage, Optick::GetFrameDescription(Optick::FrameType::GPU), startTimestamp)
}
else
{
OPTICK_STORAGE_EVENT(GPUThreadStorage.EventStorage, Description, startTimestamp, endTimestamp);
}
GPUThreadStorage.LastTimestamp = FMath::Max<uint64>(GPUThreadStorage.LastTimestamp, endTimestamp);
}
}
}
}
#else
void FOptickPlugin::OnEndFrameRT()
{
FScopeLock ScopeLock(&UpdateCriticalSection);
// VS TODO: Add MGPU support
uint32 GPUIndex = 0;

if (!IsCapturing || !Optick::IsActive(Optick::Mode::GPU))
return;
// Can't collect GPU data without valid calibration between CPU and GPU timestamps
if (!UpdateCalibrationTimestamp(Frame, GPUIndex))
return;

QUICK_SCOPE_CYCLE_COUNTER(STAT_FOptickPlugin_UpdRT);
uint64 lastTimeStamp = FMath::Max(CalibrationTimestamps[GPUIndex].CPUMicroseconds, GPUThreadStorage.LastTimestamp);

if (FRealtimeGPUProfilerImpl* gpuProfiler = reinterpret_cast<FRealtimeGPUProfilerImpl*>(FRealtimeGPUProfiler::Get()))
{
if (FRealtimeGPUProfilerFrameImpl* Frame = gpuProfiler->Frames[gpuProfiler->ReadBufferIndex])
{
FRHICommandListImmediate& RHICmdList = FRHICommandListExecutor::GetImmediateCommandList();
const FRealtimeGPUProfilerEventImpl& FirstEvent = Frame->GpuProfilerEvents[1];
uint64 frameStartTimestamp = FMath::Max(ConvertGPUTimestamp(FirstEvent.GetStartResultMicroseconds(GPUIndex), GPUIndex), lastTimeStamp);

bool bAnyEventFailed = false;
bool bAllQueriesAllocated = true;
OPTICK_FRAME_FLIP(Optick::FrameType::GPU, frameStartTimestamp);
OPTICK_STORAGE_PUSH(GPUThreadStorage.EventStorage, Optick::GetFrameDescription(Optick::FrameType::GPU), frameStartTimestamp)

for (int i = 0; i < Frame->GpuProfilerEvents.Num(); ++i)
for (int32 Idx = 1; Idx < NumEventsThisFramePlusOne; ++Idx)
{
FRealtimeGPUProfilerEventImpl* Event = Frame->GpuProfilerEvents[i];
check(Event != nullptr);
const FRealtimeGPUProfilerEventImpl& Event = Frame->GpuProfilerEvents[Idx];

if (!Event->HasValidResult())
if (Event.GetGPUMask().Contains(GPUIndex))
{
Event->GatherQueryResults(RHICmdList);
}
const FName Name = Event.Name;

if (!Event->HasValidResult())
{
#if UE_BUILD_DEBUG
UE_LOG(OptickLog, Warning, TEXT("Query '%s' not ready."), *Event->GetName().ToString());
#endif
// The frame isn't ready yet. Don't update stats - we'll try again next frame.
bAnyEventFailed = true;
continue;
}
if (Name == NAME_GPU_Unaccounted)
continue;

if (!Event->HasQueriesAllocated())
{
bAllQueriesAllocated = false;
}
}

if (bAnyEventFailed)
{
return;
}
Optick::EventDescription* Description = nullptr;

if (!bAllQueriesAllocated)
{
static bool bWarned = false;
if (Optick::EventDescription** ppDescription = GPUDescriptionMap.Find(Name))
{
Description = *ppDescription;
}
else
{
Description = Optick::EventDescription::CreateShared(TCHAR_TO_ANSI(*Name.ToString()));
GPUDescriptionMap.Add(Name, Description);
}

if (!bWarned)
{
bWarned = true;
UE_LOG(OptickLog, Warning, TEXT("Ran out of GPU queries! Results for this frame will be incomplete"));
}
}
uint64 startTimestamp = ConvertGPUTimestamp(Event.GetStartResultMicroseconds(GPUIndex), GPUIndex);
uint64 endTimestamp = ConvertGPUTimestamp(Event.GetEndResultMicroseconds(GPUIndex), GPUIndex);

// Fixing potential errors
startTimestamp = FMath::Max(startTimestamp, lastTimeStamp);
endTimestamp = FMath::Max(endTimestamp, startTimestamp);

for (int i = 0; i < Frame->GpuProfilerEvents.Num(); ++i)
{
FRealtimeGPUProfilerEventImpl* Event = Frame->GpuProfilerEvents[i];
check(Event != nullptr);
// Ensuring correct hierarchy
while (EventStack.Num() && (EventStack.Last().Finish <= startTimestamp))
EventStack.Pop();

const FName Name = Event->Name;
// Discovered broken hierarchy, skipping event
if (EventStack.Num() && (endTimestamp < EventStack.Last().Start))
continue;

Optick::EventDescription* Description = nullptr;
// Clamp range against the parent counter
if (EventStack.Num())
{
TimeRange parent = EventStack.Last();
startTimestamp = FMath::Clamp(startTimestamp, parent.Start, parent.Finish);
endTimestamp = FMath::Clamp(endTimestamp, parent.Start, parent.Finish);
}

if (Optick::EventDescription** ppDescription = GPUDescriptionMap.Find(Name))
{
Description = *ppDescription;
}
else
{
Description = Optick::EventDescription::CreateShared(TCHAR_TO_ANSI(*Name.ToString()));
GPUDescriptionMap.Add(Name, Description);
}
// Ignore invalid events
if (startTimestamp == endTimestamp)
continue;

uint64 startTimestamp = ConvertGPUTimestamp(Event->StartResultMicroseconds);
uint64 endTimestamp = ConvertGPUTimestamp(Event->EndResultMicroseconds);
//if (Name == NAME_GPU_Unaccounted)
//{
// OPTICK_FRAME_FLIP(Optick::FrameType::GPU, startTimestamp);

if (Name == NAME_GPU_Unaccounted)
{
OPTICK_FRAME_FLIP(Optick::FrameType::GPU, startTimestamp);

if (GPUThreadStorage.LastTimestamp != 0)
// OPTICK_STORAGE_PUSH(GPUThreadStorage.EventStorage, Optick::GetFrameDescription(Optick::FrameType::GPU), startTimestamp)
//}
//else
{
OPTICK_STORAGE_POP(GPUThreadStorage.EventStorage, GPUThreadStorage.LastTimestamp);
EventStack.Add(TimeRange(startTimestamp, endTimestamp));
OPTICK_STORAGE_EVENT(GPUThreadStorage.EventStorage, Description, startTimestamp, endTimestamp);
}

OPTICK_STORAGE_PUSH(GPUThreadStorage.EventStorage, Optick::GetFrameDescription(Optick::FrameType::GPU), startTimestamp)
}
else
{
OPTICK_STORAGE_EVENT(GPUThreadStorage.EventStorage, Description, startTimestamp, endTimestamp);
lastTimeStamp = FMath::Max<uint64>(lastTimeStamp, endTimestamp);
}
GPUThreadStorage.LastTimestamp = FMath::Max<uint64>(GPUThreadStorage.LastTimestamp, endTimestamp);
}

OPTICK_STORAGE_POP(GPUThreadStorage.EventStorage, lastTimeStamp);
GPUThreadStorage.LastTimestamp = lastTimeStamp;
}
}
}
#endif

uint64 FOptickPlugin::ConvertGPUTimestamp(uint64 timestamp)
uint64 FOptickPlugin::ConvertGPUTimestamp(uint64 timestamp, int GPUIndex)
{
if (CalibrationTimestamp.CPUMicroseconds == 0 || CalibrationTimestamp.GPUMicroseconds == 0)
if (CalibrationTimestamps[GPUIndex].CPUMicroseconds == 0 || CalibrationTimestamps[GPUIndex].GPUMicroseconds == 0)
{
CalibrationTimestamp.CPUMicroseconds = uint64(FPlatformTime::ToSeconds64(FPlatformTime::Cycles64()) * 1e6);
CalibrationTimestamp.GPUMicroseconds = timestamp;
return (uint64)-1;
}

const uint64 cpuTimestampUs = timestamp - CalibrationTimestamp.GPUMicroseconds + CalibrationTimestamp.CPUMicroseconds;
const uint64 cpuTimestampUs = timestamp - CalibrationTimestamps[GPUIndex].GPUMicroseconds + CalibrationTimestamps[GPUIndex].CPUMicroseconds;
const uint64 cpuTimestamp = cpuTimestampUs * 1e-6 / FPlatformTime::GetSecondsPerCycle64();
return cpuTimestamp;
}
Expand Down Expand Up @@ -639,13 +634,22 @@ void FOptickPlugin::GetDataFromStatsThread(int64 CurrentFrame)
const FName groupName = Item.NameAndInfo.GetGroupName();

uint32 color = 0;
uint32 filter = 0;

if (NAME_STATGROUP_CPUStalls == groupName)
color = Optick::Color::White;
{
color = Optick::Color::Tomato;
filter = Optick::Filter::Wait;
}

for (int i = 0; i < sizeof(NAME_Wait) / sizeof(NAME_Wait[0]); ++i)
{
if (NAME_Wait[i] == shortName)
{
color = Optick::Color::White;
filter = Optick::Filter::Wait;
}
}

Description = Optick::EventDescription::CreateShared(TCHAR_TO_ANSI(*shortName.ToString()), nullptr, 0, color);

Expand Down
Loading

0 comments on commit c2ec292

Please sign in to comment.