Skip to content

Commit

Permalink
performance(debugger): optimize ATT event handling
Browse files Browse the repository at this point in the history
- check threads with attention state before reading SR_IDENT
- memory read is costly, for threads already in stopped state
there is no need to read sr_ident and check thread state again
- single stepping one thread performance is substantially improved,
by the factor of 6x

Related-To: NEO-8183

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
  • Loading branch information
HoppeMateusz authored and Compute-Runtime-Automation committed Jul 17, 2023
1 parent 204e5fd commit 9a53f20
Show file tree
Hide file tree
Showing 8 changed files with 203 additions and 4 deletions.
17 changes: 17 additions & 0 deletions level_zero/tools/source/debug/debug_session_imp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1393,4 +1393,21 @@ ze_result_t DebugSessionImp::waitForCmdReady(EuThread::ThreadId threadId, uint16
return ZE_RESULT_SUCCESS;
}

void DebugSessionImp::getNotStoppedThreads(const std::vector<EuThread::ThreadId> &threadsWithAtt, std::vector<EuThread::ThreadId> &notStoppedThreads) {
for (const auto &threadId : threadsWithAtt) {

bool wasStopped = false;

if (tileSessionsEnabled) {
wasStopped = tileSessions[threadId.tileIndex].first->allThreads[threadId]->isStopped();
} else {
wasStopped = allThreads[threadId]->isStopped();
}

if (!wasStopped) {
notStoppedThreads.push_back(threadId);
}
}
}

} // namespace L0
1 change: 1 addition & 0 deletions level_zero/tools/source/debug/debug_session_imp.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ struct DebugSessionImp : DebugSession {
tileAttachEnabled = false;
}
}
void getNotStoppedThreads(const std::vector<EuThread::ThreadId> &threadsWithAtt, std::vector<EuThread::ThreadId> &notStoppedThreads);

virtual void attachTile() = 0;
virtual void detachTile() = 0;
Expand Down
10 changes: 8 additions & 2 deletions level_zero/tools/source/debug/linux/prelim/debug_session.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1325,8 +1325,14 @@ void DebugSessionLinux::handleAttentionEvent(prelim_drm_i915_debug_event_eu_atte
}

if (gpuVa != 0 && stateSaveAreaSize != 0) {
stateSaveArea = std::make_unique<char[]>(stateSaveAreaSize);
stateSaveReadResult = readGpuMemory(vmHandle, stateSaveArea.get(), stateSaveAreaSize, gpuVa);

std::vector<EuThread::ThreadId> newThreads;
getNotStoppedThreads(threadsWithAttention, newThreads);

if (newThreads.size() > 0) {
stateSaveArea = std::make_unique<char[]>(stateSaveAreaSize);
stateSaveReadResult = readGpuMemory(vmHandle, stateSaveArea.get(), stateSaveAreaSize, gpuVa);
}
} else {
PRINT_DEBUGGER_ERROR_LOG("Context state save area bind info invalid\n", "");
DEBUG_BREAK_IF(true);
Expand Down
8 changes: 6 additions & 2 deletions level_zero/tools/source/debug/windows/debug_session.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,12 @@ ze_result_t DebugSessionWindows::handleEuAttentionBitsEvent(DBGUMD_READ_EVENT_EU
std::unique_lock<std::mutex> lock(threadStateMutex);

if (gpuVa != 0 && stateSaveAreaSize != 0) {
stateSaveArea = std::make_unique<char[]>(stateSaveAreaSize);
stateSaveReadResult = readGpuMemory(memoryHandle, stateSaveArea.get(), stateSaveAreaSize, gpuVa);
std::vector<EuThread::ThreadId> newThreads;
getNotStoppedThreads(threadsWithAttention, newThreads);
if (newThreads.size() > 0) {
stateSaveArea = std::make_unique<char[]>(stateSaveAreaSize);
stateSaveReadResult = readGpuMemory(memoryHandle, stateSaveArea.get(), stateSaveAreaSize, gpuVa);
}
} else {
PRINT_DEBUGGER_ERROR_LOG("Context state save area bind info invalid\n", "");
DEBUG_BREAK_IF(true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1756,6 +1756,33 @@ TEST_F(DebugSessionTest, givenTssMagicCorruptedWhenStateSaveAreIsReadThenHeaderI
EXPECT_TRUE(session->stateSaveAreaHeader.empty());
}

TEST(DebugSessionTest, givenStoppedThreadWhenGettingNotStoppedThreadsThenOnlyRunningOrUnavailableThreadsAreReturned) {
zet_debug_config_t config = {};
config.pid = 0x1234;
auto hwInfo = *NEO::defaultHwInfo.get();

NEO::MockDevice *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
Mock<L0::DeviceImp> deviceImp(neoDevice, neoDevice->getExecutionEnvironment());

auto sessionMock = std::make_unique<MockDebugSession>(config, &deviceImp);

EuThread::ThreadId thread(0, 0, 0, 0, 0);
sessionMock->allThreads[thread]->stopThread(1u);
EuThread::ThreadId thread1(0, 0, 0, 0, 1);
EXPECT_FALSE(sessionMock->allThreads[thread1]->isStopped());

std::vector<EuThread::ThreadId> threadsWithAtt;
std::vector<EuThread::ThreadId> newStops;

threadsWithAtt.push_back(thread);
threadsWithAtt.push_back(thread1);

sessionMock->getNotStoppedThreads(threadsWithAtt, newStops);

ASSERT_EQ(1u, newStops.size());
EXPECT_EQ(thread1, newStops[0]);
}

using MultiTileDebugSessionTest = Test<MultipleDevicesWithCustomHwInfo>;

TEST_F(MultiTileDebugSessionTest, givenThreadsFromMultipleTilesWhenResumeCalledThenThreadsResumedInAllTiles) {
Expand Down Expand Up @@ -3423,6 +3450,35 @@ TEST_F(MultiTileDebugSessionTest, givenAttachedRootDeviceWhenAttachingToTiletDev
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}

TEST_F(MultiTileDebugSessionTest, givenTileSessionAndStoppedThreadWhenGettingNotStoppedThreadsThenOnlyRunningOrUnavailableThreadsReturned) {
zet_debug_config_t config = {};
config.pid = 0x1234;

L0::Device *device = driverHandle->devices[0];
auto neoDevice = device->getNEODevice();
auto deviceImp = static_cast<DeviceImp *>(device);
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.reset(new OsInterfaceWithDebugAttach);

auto sessionMock = new MockDebugSession(config, device, false);
sessionMock->initialize();
deviceImp->setDebugSession(sessionMock);

EuThread::ThreadId thread(0, 0, 0, 0, 0);
static_cast<MockDebugSession *>(sessionMock->tileSessions[0].first)->allThreads[thread]->stopThread(1u);
EuThread::ThreadId thread1(0, 0, 0, 0, 1);
EXPECT_FALSE(static_cast<MockDebugSession *>(sessionMock->tileSessions[0].first)->allThreads[thread1]->isStopped());

std::vector<EuThread::ThreadId> threadsWithAtt;
std::vector<EuThread::ThreadId> newStops;

threadsWithAtt.push_back(thread);
threadsWithAtt.push_back(thread1);

sessionMock->getNotStoppedThreads(threadsWithAtt, newStops);
ASSERT_EQ(1u, newStops.size());
EXPECT_EQ(thread1, newStops[0]);
}

struct AffinityMaskForSingleSubDevice : MultipleDevicesWithCustomHwInfo {
void setUp() {
DebugManager.flags.ZE_AFFINITY_MASK.set("0.1");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6527,6 +6527,70 @@ TEST_F(DebugApiLinuxAttentionTest, GivenNoStateSaveAreaOrInvalidSizeWhenHandling
EXPECT_EQ(0u, sessionMock->readSystemRoutineIdentFromMemoryCallCount);
}

TEST_F(DebugApiLinuxAttentionTest, GivenAlreadyStoppedThreadsWhenHandlingAttEventThenStateSaveAreaIsNotRead) {
zet_debug_config_t config = {};
config.pid = 0x1234;

auto sessionMock = std::make_unique<MockDebugSessionLinux>(config, device, 10);
ASSERT_NE(nullptr, sessionMock);
sessionMock->clientHandle = MockDebugSessionLinux::mockClientHandle;
auto handler = new MockIoctlHandler;
sessionMock->ioctlHandler.reset(handler);
SIP::version version = {2, 0, 0};
initStateSaveArea(sessionMock->stateSaveAreaHeader, version, device);
handler->setPreadMemory(sessionMock->stateSaveAreaHeader.data(), sessionMock->stateSaveAreaHeader.size(), 0x1000);

uint64_t ctxHandle = 2;
uint64_t vmHandle = 7;
uint64_t lrcHandle = 8;

sessionMock->clientHandleToConnection[MockDebugSessionLinux::mockClientHandle]->contextsCreated[ctxHandle].vm = vmHandle;
sessionMock->clientHandleToConnection[MockDebugSessionLinux::mockClientHandle]->lrcToContextHandle[lrcHandle] = ctxHandle;
DebugSessionLinux::BindInfo cssaInfo = {0x1000, sessionMock->stateSaveAreaHeader.size()};
sessionMock->clientHandleToConnection[MockDebugSessionLinux::mockClientHandle]->vmToContextStateSaveAreaBindInfo[vmHandle] = cssaInfo;

uint8_t data[sizeof(prelim_drm_i915_debug_event_eu_attention) + 128];
auto &hwInfo = neoDevice->getHardwareInfo();
auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
std::unique_ptr<uint8_t[]> bitmask;
size_t bitmaskSize = 0;

std::vector<EuThread::ThreadId> threads{
{0, 0, 0, 0, 0}, {0, 0, 0, 0, 2}};

// bitmask returned in ATT event - 2 threads
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, bitmaskSize);
auto threadsWithAtt = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), bitmaskSize);

for (const auto &thread : threadsWithAtt) {
sessionMock->stoppedThreads[thread.packed] = 1;
sessionMock->allThreads[thread]->stopThread(vmHandle);
}

prelim_drm_i915_debug_event_eu_attention attention = {};
attention.base.type = PRELIM_DRM_I915_DEBUG_EVENT_EU_ATTENTION;
attention.base.flags = PRELIM_DRM_I915_DEBUG_EVENT_STATE_CHANGE;
attention.base.size = sizeof(prelim_drm_i915_debug_event_eu_attention) + std::min(uint32_t(128), static_cast<uint32_t>(bitmaskSize));
attention.base.seqno = 2;
attention.client_handle = MockDebugSessionLinux::mockClientHandle;
attention.lrc_handle = lrcHandle;
attention.flags = 0;
attention.ci.engine_class = 0;
attention.ci.engine_instance = 0;
attention.bitmask_size = std::min(uint32_t(128), static_cast<uint32_t>(bitmaskSize));

memcpy(data, &attention, sizeof(prelim_drm_i915_debug_event_eu_attention));
memcpy(ptrOffset(data, offsetof(prelim_drm_i915_debug_event_eu_attention, bitmask)), bitmask.get(), std::min(size_t(128), bitmaskSize));

sessionMock->handleEvent(reinterpret_cast<prelim_drm_i915_debug_event *>(data));

EXPECT_EQ(0u, sessionMock->addThreadToNewlyStoppedFromRaisedAttentionCallCount);
EXPECT_EQ(0u, sessionMock->newlyStoppedThreads.size());

EXPECT_FALSE(handler->preadCalled);
EXPECT_EQ(0u, sessionMock->readSystemRoutineIdentFromMemoryCallCount);
}

using DebugApiLinuxAsyncThreadTest = Test<DebugApiLinuxFixture>;

TEST_F(DebugApiLinuxAsyncThreadTest, GivenPollReturnsErrorAndEinvalWhenReadingInternalEventsAsyncThenDetachEventIsGenerated) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ struct MockDebugSession : public L0::DebugSessionImp {
auto subDevice = connectedDevice->getNEODevice()->getSubDevice(i)->getSpecializedDevice<Device>();
tileSessions[i] = std::pair<DebugSessionImp *, bool>{new MockDebugSession(config, subDevice), false};
}
tileSessionsEnabled = true;
}

return ZE_RESULT_SUCCESS;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,56 @@ TEST_F(DebugApiWindowsAttentionTest, GivenEuAttentionEventForThreadsWhenHandling
EXPECT_EQ(expectedThreads, sessionMock->newlyStoppedThreads.size());
}

TEST_F(DebugApiWindowsAttentionTest, GivenAlreadyStoppedThreadsWhenHandlingAttEventThenStateSaveAreaIsNotRead) {
zet_debug_config_t config = {};
config.pid = 0x1234;

std::unique_ptr<uint8_t[]> bitmask;
size_t bitmaskSize = 0;
auto &hwInfo = neoDevice->getHardwareInfo();
auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();

std::vector<EuThread::ThreadId> threads{
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 1},
};

auto sessionMock = std::make_unique<MockDebugSessionWindows>(config, device);
for (auto thread : threads) {
sessionMock->stoppedThreads[thread.packed] = 1;
}
sessionMock->allContexts.insert(0x12345);

SIP::version version = {2, 0, 0};
initStateSaveArea(sessionMock->stateSaveAreaHeader, version, device);
sessionMock->stateSaveAreaCaptured = true;
sessionMock->stateSaveAreaVA.store(reinterpret_cast<uint64_t>(sessionMock->stateSaveAreaHeader.data()));
sessionMock->stateSaveAreaSize.store(sessionMock->stateSaveAreaHeader.size());

mockWddm->srcReadBuffer = sessionMock->stateSaveAreaHeader.data();
mockWddm->srcReadBufferBaseAddress = sessionMock->stateSaveAreaVA.load();

l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, bitmaskSize);
auto threadsWithAtt = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), bitmaskSize);

for (const auto &thread : threadsWithAtt) {
sessionMock->stoppedThreads[thread.packed] = 1;
sessionMock->allThreads[thread]->stopThread(0);
}

mockWddm->numEvents = 1;
mockWddm->eventQueue[0].readEventType = DBGUMD_READ_EVENT_EU_ATTN_BIT_SET;
copyBitmaskToEventParams(&mockWddm->eventQueue[0].eventParamsBuffer.eventParamsBuffer, bitmask, bitmaskSize);
sessionMock->wddm = mockWddm;
sessionMock->debugHandle = MockDebugSessionWindows::mockDebugHandle;

auto result = sessionMock->readAndHandleEvent(100);

EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, mockWddm->dbgUmdEscapeActionCalled[DBGUMD_ACTION_READ_GFX_MEMORY]);
EXPECT_EQ(0u, sessionMock->readSystemRoutineIdentFromMemoryCallCount);
}

TEST_F(DebugApiWindowsAttentionTest, GivenNoContextWhenHandlingAttentionEventThenErrorIsReturned) {
zet_debug_config_t config = {};
config.pid = 0x1234;
Expand Down

0 comments on commit 9a53f20

Please sign in to comment.