Skip to content

Commit 4f83680

Browse files
authored
Fixes cl_gpu_metrics sample to pass test (#19)
* Fixes cl_gpu_metrics sample to pass test Signed-off-by: gta <julia.fedorova@intel.com> * Removes unnecessary empty lines Signed-off-by: gta <julia.fedorova@intel.com> --------- Signed-off-by: gta <julia.fedorova@intel.com>
1 parent 98c0908 commit 4f83680

File tree

5 files changed

+122
-16
lines changed

5 files changed

+122
-16
lines changed

samples/cl_gpu_metrics/cl_metric_collector.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,12 @@ class ClMetricCollector {
3535
uint32_t sub_device_id =
3636
sub_device_string.empty() ? 0 : std::stoul(sub_device_string);
3737

38+
std::string order_string = utils::GetEnv("PTI_DEVICE_PCI_ORDER");
39+
bool respect_device_pci_order =
40+
order_string.empty() ? false : true;
41+
3842
MetricDevice* metric_device =
39-
MetricDevice::Create(device_id, sub_device_id);
43+
MetricDevice::Create(device_id, sub_device_id, respect_device_pci_order);
4044
if (metric_device == nullptr) {
4145
std::cerr << "[WARNING] Unable to find MD library" << std::endl;
4246
return nullptr;
@@ -150,6 +154,8 @@ class ClMetricCollector {
150154
return calculated_reports;
151155
}
152156

157+
const utils::DeviceUUID* GetDeviceUUID () { return device_uuid_; };
158+
153159
ClMetricCollector(const ClMetricCollector& copy) = delete;
154160
ClMetricCollector& operator=(const ClMetricCollector& copy) = delete;
155161

@@ -161,6 +167,7 @@ class ClMetricCollector {
161167
PTI_ASSERT(device_ != nullptr);
162168
PTI_ASSERT(group_ != nullptr);
163169
PTI_ASSERT(set_ != nullptr);
170+
device_uuid_ = const_cast<utils::DeviceUUID*>(device->GetDeviceUUID());
164171
EnableMetrics();
165172
}
166173

@@ -256,6 +263,7 @@ class ClMetricCollector {
256263
std::thread* collector_thread_ = nullptr;
257264

258265
std::vector<uint8_t> metric_storage_;
266+
utils::DeviceUUID* device_uuid_ = nullptr;
259267
};
260268

261-
#endif // PTI_SAMPLES_CL_GPU_METRICS_CL_METRIC_COLLECTOR_H_
269+
#endif // PTI_SAMPLES_CL_GPU_METRICS_CL_METRIC_COLLECTOR_H_

samples/cl_gpu_metrics/tool.cc

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,31 @@ static std::chrono::steady_clock::time_point start;
4949
extern "C" PTI_EXPORT
5050
void Usage() {
5151
std::cout <<
52-
"Usage: ./cl_gpu_metrics[.exe] <application> <args>" <<
52+
"Usage: ./cl_gpu_metrics[.exe] [options] <application> <args>" <<
5353
std::endl;
54+
std::cout << "Options:" << std::endl;
55+
std::cout <<
56+
"--pci-order " <<
57+
"Enumerate devices in the order of their PCI addresses" <<
58+
std::endl;
59+
std::cout <<
60+
"Set PTI_DEVICE_ID and PTI_SUB_DEVICE_ID to collect for specific device/sub-device" <<
61+
std::endl;
62+
5463
}
5564

5665
extern "C" PTI_EXPORT
5766
int ParseArgs(int argc, char* argv[]) {
58-
return 1;
67+
int app_index = 1;
68+
for (int i = 1; i < argc; ++i) {
69+
if (strcmp(argv[i], "--pci-order") == 0 ){
70+
utils::SetEnv("PTI_DEVICE_PCI_ORDER", "1");
71+
++app_index;
72+
} else {
73+
break;
74+
}
75+
}
76+
return app_index;
5977
}
6078

6179
extern "C" PTI_EXPORT
@@ -83,9 +101,9 @@ static KernelMap GetKernelMap() {
83101

84102
int gpu_timestamp_id = metric_collector->GetMetricId("QueryBeginTime");
85103
PTI_ASSERT(gpu_timestamp_id >= 0);
86-
int eu_active_id = metric_collector->GetMetricId("EuActive");
104+
int eu_active_id = metric_collector->GetMetricId("XVE_ACTIVE");
87105
PTI_ASSERT(eu_active_id >= 0);
88-
int eu_stall_id = metric_collector->GetMetricId("EuStall");
106+
int eu_stall_id = metric_collector->GetMetricId("XVE_STALL");
89107
PTI_ASSERT(eu_stall_id >= 0);
90108

91109
uint32_t report_size = metric_collector->GetReportSize();
@@ -171,7 +189,23 @@ static void PrintResults() {
171189
}
172190

173191
std::cerr << std::endl;
174-
std::cerr << "=== Device Metrics: ===" << std::endl;
192+
const utils::DeviceUUID* device_uuid = metric_collector->GetDeviceUUID();
193+
if (device_uuid == nullptr) {
194+
std::cerr << "=== Device Metrics: ===" << std::endl;
195+
} else {
196+
std::cerr << "=== Device ( bdf:" << std::hex
197+
<< (uint32_t)device_uuid->pciBus
198+
<< ":" << (uint32_t)device_uuid->pciDevice
199+
<< "." << (uint32_t)device_uuid->pciFunction
200+
<< std::dec;
201+
if (device_uuid->subDeviceId == 0) {
202+
std::cerr << " root device";
203+
} else{
204+
std::cerr << " sub-device: " << (uint32_t)(device_uuid->subDeviceId - 1);
205+
}
206+
std::cerr << ") Metrics: ===" << std::dec << std::endl;
207+
}
208+
175209
std::cerr << std::endl;
176210
std::cerr << "Total Execution Time (ns): " << time.count() << std::endl;
177211
std::cerr << "Total Kernel Time (ns): " << total_duration << std::endl;
@@ -251,4 +285,4 @@ void DisableProfiling() {
251285
delete kernel_collector;
252286
delete metric_collector;
253287
}
254-
}
288+
}

tests/samples/cl_gpu_metrics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def run(path):
5959
app_folder = utils.get_sample_executable_path("cl_gemm")
6060
app_file = os.path.join(app_folder, "cl_gemm" + file_extention)
6161
command = [file_name_prefix + "cl_gpu_metrics" + file_extention,\
62-
app_file, "gpu", "1024", "1"]
62+
"--pci-order", app_file, "gpu", "1024", "1"]
6363
stdout, stderr = utils.run_process(command, path)
6464
if not stdout:
6565
return "stdout is empty"

utils/metric_device.h

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#define PTI_UTILS_METRIC_DEVICE_H_
99

1010
#include <string.h>
11+
#include <map>
1112

1213
#include "metric_utils.h"
1314
#include "pti_assert.h"
@@ -30,7 +31,7 @@ class MetricDevice {
3031

3132
md::IAdapterGroupLatest* adapter_group = nullptr;
3233
md::TCompletionCode status = OpenAdapterGroup(&adapter_group);
33-
PTI_ASSERT(status == md::CC_OK);
34+
PTI_ASSERT(status == md::CC_OK || status == md::CC_ALREADY_INITIALIZED);
3435
PTI_ASSERT(adapter_group != nullptr);
3536

3637
uint32_t device_count = adapter_group->GetParams()->AdapterCount;
@@ -64,7 +65,9 @@ class MetricDevice {
6465
return sub_device_count;
6566
}
6667

67-
static MetricDevice* Create(uint32_t device_id, uint32_t sub_device_id) {
68+
static MetricDevice* Create(uint32_t device_id,
69+
uint32_t sub_device_id,
70+
bool respect_device_pci_order = false ) {
6871
SharedLibrary* lib = OpenMetricsLibrary();
6972
if (lib == nullptr) {
7073
return nullptr;
@@ -87,8 +90,32 @@ class MetricDevice {
8790
return nullptr;
8891
}
8992

90-
PTI_ASSERT(device_id < adapter_group->GetParams()->AdapterCount);
91-
md::IAdapterLatest* adapter = adapter_group->GetAdapter(device_id);
93+
uint32_t adapter_count = adapter_group->GetParams()->AdapterCount;
94+
PTI_ASSERT(device_id < adapter_count);
95+
md::IAdapterLatest* adapter = nullptr;
96+
if (respect_device_pci_order) {
97+
// needed to order adapters by PCI address
98+
std::map<md::SAdapterParams_1_9,
99+
md::IAdapter_1_11*,
100+
utils::ComparatorPciAddress<md::SAdapterParams_1_9> > adapters_map;
101+
for (uint32_t i = 0; i < adapter_count; i++){
102+
md::IAdapter_1_11* ad = adapter_group->GetAdapter(i);
103+
PTI_ASSERT(ad != nullptr);
104+
const md::SAdapterParams_1_9* params = ad->GetParams();
105+
adapters_map.insert(std::pair<md::SAdapterParams_1_9,md::IAdapter_1_11*>{*params, ad});
106+
}
107+
uint32_t id = 0;
108+
for (const auto& entry : adapters_map ) {
109+
if (id == device_id) {
110+
adapter = entry.second;
111+
break;
112+
}
113+
id++;
114+
}
115+
116+
} else {
117+
adapter = adapter_group->GetAdapter(device_id);
118+
}
92119
PTI_ASSERT(adapter != nullptr);
93120

94121
uint32_t sub_device_count = adapter->GetParams()->SubDevicesCount;
@@ -100,7 +127,8 @@ class MetricDevice {
100127
}
101128
PTI_ASSERT(status == md::CC_OK || status == md::CC_ALREADY_INITIALIZED);
102129

103-
return new MetricDevice(adapter_group, adapter, device, lib);
130+
return new MetricDevice(adapter_group, adapter, device, lib,
131+
(sub_device_count == 0) ? 0 : sub_device_id + 1);
104132
}
105133

106134
~MetricDevice() {
@@ -170,6 +198,9 @@ class MetricDevice {
170198
return nullptr;
171199
}
172200

201+
const utils::DeviceUUID* GetDeviceUUID() {
202+
return &device_uuid_;
203+
}
173204
private:
174205
static SharedLibrary* OpenMetricsLibrary() {
175206
SharedLibrary* lib = nullptr;
@@ -184,13 +215,21 @@ class MetricDevice {
184215

185216
MetricDevice(
186217
md::IAdapterGroupLatest* adapter_group, md::IAdapterLatest* adapter,
187-
md::IMetricsDeviceLatest* device, SharedLibrary* lib)
218+
md::IMetricsDeviceLatest* device, SharedLibrary* lib, uint32_t sub_device_index = 0)
188219
: adapter_group_(adapter_group), adapter_(adapter),
189-
device_(device), lib_(lib) {}
220+
device_(device), lib_(lib) {
221+
device_uuid_.vendorID = static_cast<uint16_t>(adapter_->GetParams()->VendorId);
222+
device_uuid_.deviceID = static_cast<uint16_t>(adapter_->GetParams()->DeviceId);
223+
device_uuid_.pciBus = static_cast<uint8_t>(adapter_->GetParams()->BusNumber);
224+
device_uuid_.pciDevice = static_cast<uint8_t>(adapter_->GetParams()->DeviceNumber);
225+
device_uuid_.pciFunction = static_cast<uint8_t>(adapter_->GetParams()->FunctionNumber);
226+
device_uuid_.subDeviceId = static_cast<uint8_t>(sub_device_index);
227+
}
190228

191229
md::IAdapterGroupLatest* adapter_group_ = nullptr;
192230
md::IAdapterLatest* adapter_ = nullptr;
193231
md::IMetricsDeviceLatest* device_ = nullptr;
232+
utils::DeviceUUID device_uuid_ ;
194233
SharedLibrary* lib_ = nullptr;
195234
};
196235

utils/utils.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,18 @@
4242

4343
namespace utils {
4444

45+
struct DeviceUUID {
46+
uint16_t vendorID;
47+
uint16_t deviceID;
48+
uint16_t revisionID;
49+
uint16_t pciDomain;
50+
uint8_t pciBus;
51+
uint8_t pciDevice;
52+
uint8_t pciFunction;
53+
uint8_t reserved[4];
54+
uint8_t subDeviceId;
55+
};
56+
4557
struct Comparator {
4658
template<typename T>
4759
bool operator()(const T& left, const T& right) const {
@@ -52,6 +64,19 @@ struct Comparator {
5264
}
5365
};
5466

67+
template<typename T>
68+
struct ComparatorPciAddress {
69+
bool operator()(const T& left, const T& right) const {
70+
if (left.BusNumber != right.BusNumber) {
71+
return (left.BusNumber < right.BusNumber);
72+
}
73+
if (left.DeviceNumber != right.DeviceNumber) {
74+
return (left.DeviceNumber < right.DeviceNumber);
75+
}
76+
return left.FunctionNumber < right.FunctionNumber;
77+
}
78+
};
79+
5580
#if defined(__gnu_linux__)
5681

5782
inline uint64_t GetTime(clockid_t id) {

0 commit comments

Comments
 (0)