Skip to content

Commit

Permalink
Filter out the GPUs not assigned to a container in showpid
Browse files Browse the repository at this point in the history
The process ids of other container are still visible in the sysfs file,
filter it out to prevent crash.

Change-Id: I665912cd09c606804186aff8cba5c24f5e58ded7
  • Loading branch information
bill-shuzhou-liu authored and dmitrii-galantsev committed Mar 6, 2023
1 parent c8aace1 commit e4daa77
Showing 1 changed file with 13 additions and 1 deletion.
14 changes: 13 additions & 1 deletion src/rocm_smi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3277,11 +3277,23 @@ rsmi_compute_process_gpus_get(uint32_t pid, uint32_t *dv_indices,
uint32_t i = 0;
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();

// filter out the devices not visible to container
auto& nodes = smi.kfd_node_map();
for (auto nit = gpu_set.begin(); nit != gpu_set.end();) {
uint64_t gpu_id_val = (*nit);
auto kfd_ite = nodes.find(gpu_id_val);
if (kfd_ite == nodes.end()) {
nit = gpu_set.erase(nit);
} else {
nit++;
}
}

if (dv_indices != nullptr) {
for (auto it = gpu_set.begin(); i < *num_devices && it != gpu_set.end();
++it, ++i) {
uint64_t gpu_id_val = (*it);
dv_indices[i] = smi.kfd_node_map()[gpu_id_val]->amdgpu_dev_index();
dv_indices[i] = nodes[gpu_id_val]->amdgpu_dev_index();
}
}

Expand Down

0 comments on commit e4daa77

Please sign in to comment.