-
Notifications
You must be signed in to change notification settings - Fork 46
/
gpa_split_counters_max_per_pass.h
249 lines (213 loc) · 14.3 KB
/
gpa_split_counters_max_per_pass.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
//==============================================================================
// Copyright (c) 2016-2023 Advanced Micro Devices, Inc. All rights reserved.
/// @author AMD Developer Tools Team
/// @file
/// @brief Counter splitter that puts max number of hardware counter per pass
/// not used in production, but can be useful for testing.
//==============================================================================
#ifndef GPU_PERF_API_COUNTER_GENERATOR_COMMON_GPA_SPLIT_COUNTERS_MAX_PER_PASS_H_
#define GPU_PERF_API_COUNTER_GENERATOR_COMMON_GPA_SPLIT_COUNTERS_MAX_PER_PASS_H_
#include <vector>
#include "gpu_perf_api_counter_generator/gpa_counter.h"
#include "gpu_perf_api_counter_generator/gpa_split_counters_interfaces.h"
/// @brief Splits counters such that as many internal counters will be scheduled per pass as possible.
class GpaSplitCountersMaxPerPass : public IGpaSplitCounters
{
public:
/// Initializes a new instance of the GPASplitCountersMaxPerPass class.
///
/// @param [in] timestamp_block_ids Set of timestamp block id's.
/// @param [in] time_counter_indices Set of timestamp counter indices.
/// @param [in] max_sq_counters The maximum number of counters that can be simultaneously enabled on the SQ block.
/// @param [in] num_sq_groups The number of SQ counter groups.
/// @param [in] sq_counter_block_info The list of SQ counter groups.
/// @param [in] num_isolated_from_sq_groups The number of counter groups that must be isolated from SQ counter groups.
/// @param [in] isolated_from_sq_groups The list of counter groups that must be isolated from SQ counter groups.
GpaSplitCountersMaxPerPass(const std::set<unsigned int>& timestamp_block_ids,
const std::set<unsigned int>& time_counter_indices,
unsigned int max_sq_counters,
unsigned int num_sq_groups,
GpaSqCounterGroupDesc* sq_counter_block_info,
unsigned int num_isolated_from_sq_groups,
const unsigned int* isolated_from_sq_groups)
: IGpaSplitCounters(timestamp_block_ids,
time_counter_indices,
max_sq_counters,
num_sq_groups,
sq_counter_block_info,
num_isolated_from_sq_groups,
isolated_from_sq_groups){};
/// @brief Virtual destructor.
virtual ~GpaSplitCountersMaxPerPass(){};
/// @brief Puts as many counters in the first pass as will fit based on the maxCountersPerGroup, will expand to additional passes as required by the hardware.
///
/// @param [in] public_counters_to_split The set of public counters that need to be split into passes.
/// @param [in] internal_counters_to_schedule Additional internal counters that need to be scheduled (used by internal builds).
/// @param [in] counter_group_accessor A class to access the internal counters.
/// @param [in] max_counters_per_group The maximum number of counters that can be enabled in a single pass on each HW block or SW group.
/// @param [out] num_scheduled_counters Indicates the total number of internal counters that were assigned to a pass.
///
/// @return The list of passes that the counters are separated into.
std::list<GpaCounterPass> SplitCounters(const std::vector<const GpaDerivedCounterInfoClass*>& public_counters_to_split,
const std::vector<GpaHardwareCounterIndices> internal_counters_to_schedule,
IGpaCounterGroupAccessor* accessor,
const std::vector<unsigned int>& max_counters_per_group,
unsigned int& num_scheduled_counters)
{
// This will eventually be the return value.
std::list<GpaCounterPass> pass_partitions;
// Make sure there are counters to schedule.
if (public_counters_to_split.size() == 0 && internal_counters_to_schedule.size() == 0)
{
return pass_partitions;
}
// Temporary variable to hold the number of counters assigned to each block during each of the passes.
std::list<PerPassData> num_used_counters_per_pass_per_block;
// Add initial pass information
AddNewPassInfo(1, &pass_partitions, &num_used_counters_per_pass_per_block);
// Will store the locations for each scheduled internal counter to help ensure that we don't duplicate any scheduling.
std::map<unsigned int, GpaCounterResultLocation> internal_counter_result_locations;
// Iterate through each public counter.
for (std::vector<const GpaDerivedCounterInfoClass*>::const_iterator public_iter = public_counters_to_split.begin();
public_iter != public_counters_to_split.end();
++public_iter)
{
// Iterate through the internal counters and put them into the appropriate pass (first available pass).
for (std::vector<GpaUInt32>::const_iterator counter_iter = (*public_iter)->internal_counters_required_.begin();
counter_iter != (*public_iter)->internal_counters_required_.end();
++counter_iter)
{
// See if the internal counter has already been scheduled.
std::map<unsigned int, GpaCounterResultLocation>::iterator found_counter = internal_counter_result_locations.find(*counter_iter);
if (found_counter != internal_counter_result_locations.end())
{
// Counter has been scheduled.
// Use the same result location for this instance of the counter.
AddCounterResultLocation((*public_iter)->counter_index_, *counter_iter, found_counter->second.pass_index_, found_counter->second.offset_);
}
else
{
// Counter has not been scheduled.
// So need to find a pass to put it in.
unsigned int pass_index = 0;
std::list<PerPassData>::iterator counters_used_iter = num_used_counters_per_pass_per_block.begin();
std::list<GpaCounterPass>::iterator counter_pass_iter = pass_partitions.begin();
bool done_allocating_counter = false;
while (done_allocating_counter == false)
{
// Make sure there is a partition for current pass.
AddNewPassInfo(pass_index + 1, &pass_partitions, &num_used_counters_per_pass_per_block);
// Increment the pass iterator if past the first loop.
if (pass_index > 0)
{
++counters_used_iter;
++counter_pass_iter;
}
accessor->SetCounterIndex(*counter_iter);
unsigned int group_index = accessor->GroupIndex();
// Try to add the counter to the current pass.
if (CheckForTimestampCounters(accessor, *counter_pass_iter) && CanCounterBeAdded(accessor, *counters_used_iter, max_counters_per_group) &&
CheckForSQCounters(accessor, *counters_used_iter, max_sq_counters_) && CheckCountersAreCompatible(accessor, *counters_used_iter) &&
counter_pass_iter->pass_counter_list.size() < 300)
{
counter_pass_iter->pass_counter_list.push_back(*counter_iter);
counters_used_iter->num_used_counters_per_block[group_index].push_back(accessor->CounterIndex());
done_allocating_counter = true;
// Record where the internal counter was scheduled.
GpaCounterResultLocation location;
location.pass_index_ = (GpaUInt16)pass_index;
location.offset_ = (GpaUInt16)counter_pass_iter->pass_counter_list.size() - 1;
internal_counter_result_locations[*counter_iter] = location;
// Record where to get the result from
AddCounterResultLocation((*public_iter)->counter_index_, *counter_iter, location.pass_index_, location.offset_);
}
else
{
// The counters needs to go into the next pass. Increment the pass index and let the loop happen again.
++pass_index;
}
}
}
}
}
// Set the number of scheduled internal counters.
num_scheduled_counters = (unsigned int)internal_counter_result_locations.size();
InsertHardwareCounters(pass_partitions, internal_counters_to_schedule, accessor, num_used_counters_per_pass_per_block, max_counters_per_group, num_scheduled_counters);
return pass_partitions;
}
private:
/// @brief Inserts each hardware counter into the earliest possible pass.
///
/// @param [in,out] pass_partitions The pass partitions that are generated.
/// @param [in] internal_counters The internal counters that need to be scheduled.
/// @param [in] accessor A interface that accesses the internal counters.
/// @param [in] num_used_counters_per_pass_per_block A list of passes, each consisting of the number of counters scheduled on each block.
/// @param [in] max_counters_per_group A vector containing the maximum number of simultaneous counters for each block.
/// @param [in,out] num_scheduled_counters The total number of internal counters that were scheduled.
void InsertHardwareCounters(std::list<GpaCounterPass>& pass_partitions,
const std::vector<GpaHardwareCounterIndices> internal_counters,
IGpaCounterGroupAccessor* accessor,
std::list<PerPassData> num_used_counters_per_pass_per_block,
const std::vector<unsigned int>& max_counters_per_group,
unsigned int& num_scheduled_counters)
{
// Schedule each of the internal counters.
for (std::vector<GpaHardwareCounterIndices>::const_iterator internal_counter_iter = internal_counters.begin();
internal_counter_iter != internal_counters.end();
++internal_counter_iter)
{
// If the counter is already scheduled in any pass, there is no reason to add it again.
bool counter_already_scheduled = false;
unsigned int pass_index = 0;
for (std::list<GpaCounterPass>::iterator pass_iter = pass_partitions.begin(); pass_iter != pass_partitions.end(); ++pass_iter)
{
int existing_offset = VectorContains<unsigned int>(pass_iter->pass_counter_list, internal_counter_iter->hardware_index);
if (existing_offset >= 0)
{
counter_already_scheduled = true;
// Record where to get the result from.
AddCounterResultLocation(internal_counter_iter->public_index, internal_counter_iter->hardware_index, pass_index, existing_offset);
break;
}
pass_index++;
}
if (counter_already_scheduled)
{
// Don't need to schedule this counter,
// continue on to the next internal counter that needs to be scheduled.
continue;
}
// The counter needs to be scheduled,
// make sure there is enough space for the first pass.
AddNewPassInfo(1, &pass_partitions, &num_used_counters_per_pass_per_block);
accessor->SetCounterIndex(internal_counter_iter->hardware_index);
// Iterate through the passes again and find one where the counter can be inserted.
pass_index = 0;
std::list<PerPassData>::iterator counters_used_iter = num_used_counters_per_pass_per_block.begin();
for (std::list<GpaCounterPass>::iterator pass_iter = pass_partitions.begin(); pass_iter != pass_partitions.end(); ++pass_iter)
{
if (CheckForTimestampCounters(accessor, *pass_iter) == true && CanCounterBeAdded(accessor, *counters_used_iter, max_counters_per_group) == true &&
CheckForSQCounters(accessor, *counters_used_iter, max_sq_counters_) == true && CheckCountersAreCompatible(accessor, *counters_used_iter) == true)
{
// The counter can be scheduled here.
pass_iter->pass_counter_list.push_back(internal_counter_iter->hardware_index);
counters_used_iter->num_used_counters_per_block[accessor->GroupIndex()].push_back(accessor->CounterIndex());
num_scheduled_counters += 1;
// Record where the result will be located.
unsigned int offset = (unsigned int)pass_iter->pass_counter_list.size() - 1;
AddCounterResultLocation(internal_counter_iter->public_index, internal_counter_iter->hardware_index, pass_index, offset);
break;
}
else
{
++pass_index;
// Make sure there is enough space for the next pass.
AddNewPassInfo(pass_index + 1, &pass_partitions, &num_used_counters_per_pass_per_block);
// Increment the iterator after any necessary new passes have been allocated.
++counters_used_iter;
}
}
}
}
};
#endif // GPU_PERF_API_COUNTER_GENERATOR_COMMON_GPA_SPLIT_COUNTERS_MAX_PER_PASS_H_