Skip to content

Commit

Permalink
Fix bugs with incorrect IB packet hierarchy
Browse files Browse the repository at this point in the history
This includes adding support for CP_SET_CTXSWITCH_IB, as
well as sometimes INDIRECT_BUFFER child packets getting
incorrectly added to their parent nodes
  • Loading branch information
Shan-Min Chao committed Nov 7, 2023
1 parent 415f379 commit 0adbc01
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 35 deletions.
20 changes: 9 additions & 11 deletions dive_core/command_hierarchy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -725,9 +725,11 @@ bool CommandHierarchyCreator::OnIbStart(uint32_t submit_index,
const IndirectBufferInfo &ib_info,
IbType type)
{
m_cur_ib_level = ib_info.m_ib_level;

// Make all subsequent shared node parent the actual IB-packet
if (m_cur_ib_packet_node_index != UINT64_MAX)
m_cur_shared_node_parent_index = m_cur_ib_packet_node_index;
m_shared_node_ib_parent_stack[m_cur_ib_level] = m_cur_ib_packet_node_index;

// Create IB description string
std::ostringstream ib_string_stream;
Expand Down Expand Up @@ -771,7 +773,7 @@ bool CommandHierarchyCreator::OnIbStart(uint32_t submit_index,
}
}
DIVE_ASSERT(group_index != UINT64_MAX);
m_cur_shared_node_parent_index = group_index;
m_shared_node_ib_parent_stack[m_cur_ib_level] = group_index;
}

if (ib_info.m_skip)
Expand Down Expand Up @@ -811,7 +813,6 @@ bool CommandHierarchyCreator::OnIbStart(uint32_t submit_index,
AddChild(CommandHierarchy::kSubmitTopology, parent_node_index, ib_node_index);

m_ib_stack.push_back(ib_node_index);
m_cur_ib_level = ib_info.m_ib_level;
m_cmd_begin_packet_node_indices.clear();
m_cmd_begin_event_node_indices.clear();
return true;
Expand Down Expand Up @@ -840,10 +841,6 @@ bool CommandHierarchyCreator::OnIbEnd(uint32_t submit_index,
m_cmd_begin_packet_node_indices.clear();
m_cmd_begin_event_node_indices.clear();
m_cur_ib_level = ib_info.m_ib_level;

// Reset back to submit node being the parent (as opposed to, possibly, the indirect_buffer
// packet)
m_cur_shared_node_parent_index = m_cur_submit_node_index;
return true;
}

Expand All @@ -867,7 +864,7 @@ bool CommandHierarchyCreator::OnPacket(const IMemoryManager &mem_manager,
type,
header);

uint64_t parent_index = m_cur_shared_node_parent_index;
uint64_t parent_index = m_shared_node_ib_parent_stack[m_cur_ib_level];
AddSharedChild(CommandHierarchy::kEngineTopology, parent_index, packet_node_index);
AddSharedChild(CommandHierarchy::kSubmitTopology, parent_index, packet_node_index);
AddSharedChild(CommandHierarchy::kAllEventTopology, parent_index, packet_node_index);
Expand Down Expand Up @@ -979,7 +976,8 @@ bool CommandHierarchyCreator::OnPacket(const IMemoryManager &mem_manager,
m_node_parent_info[CommandHierarchy::kRgpTopology][event_node_index] = parent_node_index;
}
else if ((opcode == CP_INDIRECT_BUFFER_PFE || opcode == CP_INDIRECT_BUFFER_PFD ||
opcode == CP_INDIRECT_BUFFER_CHAIN || opcode == CP_COND_INDIRECT_BUFFER_PFE))
opcode == CP_INDIRECT_BUFFER_CHAIN || opcode == CP_COND_INDIRECT_BUFFER_PFE ||
opcode == CP_SET_CTXSWITCH_IB))
{
m_cur_ib_packet_node_index = packet_node_index;
}
Expand Down Expand Up @@ -1077,8 +1075,8 @@ void CommandHierarchyCreator::OnSubmitStart(uint32_t submit_index, const SubmitI
AddChild(CommandHierarchy::kAllEventTopology, Topology::kRootNodeIndex, submit_node_index);
AddChild(CommandHierarchy::kRgpTopology, Topology::kRootNodeIndex, submit_node_index);
m_cur_submit_node_index = submit_node_index;
m_cur_shared_node_parent_index = m_cur_submit_node_index;
m_cur_engine_index = submit_info.GetEngineIndex();
m_cur_ib_level = 1;
m_shared_node_ib_parent_stack[m_cur_ib_level] = m_cur_submit_node_index;
m_cur_ib_packet_node_index = UINT64_MAX;
m_ib_stack.clear();
}
Expand Down
6 changes: 3 additions & 3 deletions dive_core/command_hierarchy.h
Original file line number Diff line number Diff line change
Expand Up @@ -560,10 +560,10 @@ class CommandHierarchyCreator : public IEmulateCallbacks
uint32_t m_num_events; // Number of events so far
Packets m_packets; // Packets added since last event

uint64_t m_cur_shared_node_parent_index; // The current parent of any shared node being added
uint64_t m_cur_submit_node_index; // Current submit node being processed
uint64_t m_cur_ib_packet_node_index; // Current ib packet node being processed
uint64_t m_cur_submit_node_index; // Current submit node being processed
uint64_t m_cur_ib_packet_node_index; // Current ib packet node being processed
uint8_t m_cur_ib_level;
uint64_t m_shared_node_ib_parent_stack[EmulatePM4::kTotalIbLevels];

// Flattening is the process of forcing chain ib nodes to only ever be child to non-chain ib
// nodes, even when daisy chaining across multiple chain ib nodes. This makes the topology
Expand Down
12 changes: 6 additions & 6 deletions dive_core/common/emulate_pm4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -620,8 +620,8 @@ bool EmulatePM4::ExecuteSubmit(IEmulateCallbacks &callbacks,
// Used to keep track of progress of emulation so far
EmulateState emu_state;
emu_state.m_submit_index = submit_index;
emu_state.m_top_of_stack = EmulateState::kPrimaryRing;
EmulateState::IbStack *primary_ring = &emu_state.m_ib_stack[EmulateState::kPrimaryRing];
emu_state.m_top_of_stack = IbLevel::kPrimaryRing;
EmulateState::IbStack *primary_ring = &emu_state.m_ib_stack[IbLevel::kPrimaryRing];
primary_ring->m_cur_va = UINT64_MAX;
primary_ring->m_ib_queue_index = primary_ring->m_ib_queue_size = 0;

Expand All @@ -648,7 +648,7 @@ bool EmulatePM4::ExecuteSubmit(IEmulateCallbacks &callbacks,

// Should always be emulating something in an IB. If it's parked at the primary ring,
// then that means emulation has completed
while (emu_state.m_top_of_stack != EmulateState::kPrimaryRing)
while (emu_state.m_top_of_stack != IbLevel::kPrimaryRing)
{
// Callbacks + advance
EmulateState::IbStack *cur_ib_level = &emu_state.m_ib_stack[emu_state.m_top_of_stack];
Expand Down Expand Up @@ -896,15 +896,15 @@ bool EmulatePM4::AdvanceToQueuedIB(const IMemoryManager &mem_manager,

if (prev_ib_level->m_ib_queue_type != IbType::kChain)
{
DIVE_ASSERT(emu_state->m_top_of_stack < EmulateState::kTotalIbLevels - 1);
DIVE_ASSERT(emu_state->m_top_of_stack < IbLevel::kTotalIbLevels - 1);

// If it's calling into another IB from the primary ring, then update the ib index. Note
// that only IBs called from primary ring as labelled as "normal" ibs
if (prev_ib_level->m_ib_queue_type == IbType::kNormal)
emu_state->m_ib_index = prev_ib_level->m_ib_queue_index;

// Enter next IB level (CALL)
emu_state->m_top_of_stack = (EmulateState::IbLevel)(emu_state->m_top_of_stack + 1);
emu_state->m_top_of_stack = (IbLevel)(emu_state->m_top_of_stack + 1);

uint32_t index = prev_ib_level->m_ib_queue_index;
EmulateState::IbStack *new_ib_level = emu_state->GetCurIb();
Expand Down Expand Up @@ -998,7 +998,7 @@ bool EmulatePM4::AdvanceOutOfIB(EmulateState *emu_state, IEmulateCallbacks &call
{
EmulateState::IbStack *cur_ib_level = emu_state->GetCurIb();

emu_state->m_top_of_stack = (EmulateState::IbLevel)(emu_state->m_top_of_stack - 1);
emu_state->m_top_of_stack = (IbLevel)(emu_state->m_top_of_stack - 1);

IndirectBufferInfo ib_info;
ib_info.m_va_addr = cur_ib_level->m_cur_va;
Expand Down
32 changes: 17 additions & 15 deletions dive_core/common/emulate_pm4.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,21 @@ class EmulateStateTracker
class EmulatePM4
{
public:
enum IbLevel
{
kPrimaryRing,
kIb1,
kIb2,
kIb3,

// There's no IB4 in the GPU, but it's useful to have one in the emulator in case a
// CP_SET_DRAW_STATE is called from an IB3, since we're emulating those packets as
// CALLs (I'm not sure if that's even possible, but better safe than sorry!)
kIb4,

kTotalIbLevels
};

static const uint32_t kMaxPendingIbs = 100;
EmulatePM4();

Expand Down Expand Up @@ -224,24 +239,11 @@ class EmulatePM4
// Index of top-most IB (i.e. IB1)
uint32_t m_ib_index;

uint32_t m_submit_index;

// Stack to managing the different IB levels
// Top-most element contains the current Program Counter
// If top of stack is at IB0, then that means there's nothing to execute
enum IbLevel
{
kPrimaryRing,
kIb1,
kIb2,
kIb3,

// There's no IB4 in the GPU, but it's useful to have one in the emulator in case a
// CP_SET_DRAW_STATE is called from an IB3, since we're emulating those packets as
// CALLs (I'm not sure if that's even possible, but better safe than sorry!)
kIb4,

kTotalIbLevels
};
uint32_t m_submit_index;
IbStack m_ib_stack[kTotalIbLevels];
IbLevel m_top_of_stack;
IbStack *GetCurIb() { return &m_ib_stack[m_top_of_stack]; }
Expand Down

0 comments on commit 0adbc01

Please sign in to comment.