Skip to content

Commit

Permalink
Initial working version on NVIDIA, cleanup needed.
Browse files Browse the repository at this point in the history
  • Loading branch information
tpatki committed Jun 27, 2024
1 parent 18adc98 commit 67c13fa
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 22 deletions.
18 changes: 9 additions & 9 deletions src/variorum/Nvidia_GPU/nvidia_gpu_power_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -619,25 +619,25 @@ void nvidia_gpu_get_energy_json(int chipid, json_t *get_energy_obj)
for (d = chipid * (int)m_gpus_per_socket;
d < (chipid + 1) * (int)m_gpus_per_socket; ++d)
{
nvmlDeviceGetPowerUsage(m_unit_devices_file_desc[d], &gpu_power);
value = (double)gpu_power * 0.001f;
nvmlDeviceGetTotalEnergyConsumption(m_unit_devices_file_desc[d], &gpu_energy);
value = (double)gpu_energy * 0.001f;
snprintf(devID, devIDlen, "GPU_%d", d);
json_object_set_new(gpu_obj, devID, json_real(value));
total_gpu_power += value;
total_gpu_energy += value;
}

// If we have an existing CPU object with power_node_watts, update its value.
// Except on IBM Power9 systems, as they report node power with PWRSYS
// directly. So we don't need to add in the GPU values separately.

#ifndef VARIORUM_WITH_IBM_CPU
if (json_object_get(get_power_obj, "power_node_watts") != NULL)
if (json_object_get(get_energy_obj, "energy_node_joules") != NULL)
{
double power_node;
power_node = json_real_value(json_object_get(get_power_obj,
"power_node_watts"));
json_object_set(get_power_obj, "power_node_watts",
json_real(power_node + total_gpu_power));
double energy_node;
energy_node = json_real_value(json_object_get(get_energy_obj,
"energy_node_joules"));
json_object_set(get_energy_obj, "energy_node_joules",
json_real(energy_node + total_gpu_energy));
}
#endif

Expand Down
31 changes: 18 additions & 13 deletions src/variorum/variorum.c
Original file line number Diff line number Diff line change
Expand Up @@ -1576,8 +1576,8 @@ int variorum_print_energy(void)
{
int err = 0;
int i;
int has_cpu = 0;
int has_gpu = 0;
// int has_cpu = 0;
// int has_gpu = 0;
err = variorum_enter(__FILE__, __FUNCTION__, __LINE__);
if (err)
{
Expand All @@ -1588,7 +1588,7 @@ int variorum_print_energy(void)
// If we have a CPU-only or CPU+GPU multi-platform build, we should print
// the node-level energy.
// First check if we have a CPU platform, then check for a GPU platform

/*
#if defined(VARIORUM_WITH_INTEL_CPU) || defined(VARIORUM_WITH_AMD_CPU) || defined(VARIORUM_WITH_IBM_CPU)
has_cpu = 1;
#endif
Expand All @@ -1599,6 +1599,7 @@ int variorum_print_energy(void)
// CPU-only or multi-platform build
if ((has_cpu && has_gpu) || (has_cpu))
{
*/
for (i = 0; i < P_NUM_PLATFORMS; i++)
{
if (g_platform[i].variorum_print_energy == NULL)
Expand All @@ -1614,7 +1615,7 @@ int variorum_print_energy(void)
return -1;
}
}
}
/* }
else
{
// We have a GPU-only build, currently doesn't support get_energy
Expand All @@ -1623,6 +1624,7 @@ int variorum_print_energy(void)
__FUNCTION__, __LINE__);
return 0;
}
*/
err = variorum_exit(__FILE__, __FUNCTION__, __LINE__);

if (err)
Expand All @@ -1636,8 +1638,8 @@ int variorum_print_verbose_energy(void)
{
int err = 0;
int i;
int has_cpu = 0;
int has_gpu = 0;
// int has_cpu = 0;
// int has_gpu = 0;
err = variorum_enter(__FILE__, __FUNCTION__, __LINE__);
if (err)
{
Expand All @@ -1648,7 +1650,7 @@ int variorum_print_verbose_energy(void)
// If we have a CPU-only or CPU+GPU multi-platform build, we should print
// the node-level energy.
// First check if we have a CPU platform, then check for a GPU platform

/*
#if defined(VARIORUM_WITH_INTEL_CPU) || defined(VARIORUM_WITH_AMD_CPU) || defined(VARIORUM_WITH_IBM_CPU)
has_cpu = 1;
#endif
Expand All @@ -1659,6 +1661,7 @@ int variorum_print_verbose_energy(void)
// CPU-only or multi-platform build
if ((has_cpu && has_gpu) || (has_cpu))
{
*/
for (i = 0; i < P_NUM_PLATFORMS; i++)
{
if (g_platform[i].variorum_print_energy == NULL)
Expand All @@ -1674,15 +1677,15 @@ int variorum_print_verbose_energy(void)
return -1;
}
}
}
/* }
else
{
// We have a GPU-only build, currently doesn't support get_energy
variorum_error_handler("Feature not yet implemented or is not supported",
VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, getenv("HOSTNAME"), __FILE__,
__FUNCTION__, __LINE__);
return 0;
}
}*/
err = variorum_exit(__FILE__, __FUNCTION__, __LINE__);
if (err)
{
Expand All @@ -1695,8 +1698,8 @@ int variorum_get_energy_json(char **get_energy_obj_str)
{
int err = 0;
int i;
int has_cpu = 0;
int has_gpu = 0;
// int has_cpu = 0;
// int has_gpu = 0;
char hostname[1024];
uint64_t ts;
struct timeval tv;
Expand All @@ -1720,7 +1723,7 @@ int variorum_get_energy_json(char **get_energy_obj_str)
// If we have a CPU-only or CPU+GPU multi-platform build, we should print
// the node-level energy.
// First check if we have a CPU platform, then check for a GPU platform

/*
#if defined(VARIORUM_WITH_INTEL_CPU) || defined(VARIORUM_WITH_AMD_CPU) || defined(VARIORUM_WITH_IBM_CPU)
has_cpu = 1;
#endif
Expand All @@ -1731,6 +1734,7 @@ int variorum_get_energy_json(char **get_energy_obj_str)
// CPU-only or multi-platform build
if ((has_cpu && has_gpu) || (has_cpu))
{
*/
for (i = 0; i < P_NUM_PLATFORMS; i++)
{
if (g_platform[i].variorum_get_energy_json == NULL)
Expand All @@ -1748,7 +1752,7 @@ int variorum_get_energy_json(char **get_energy_obj_str)
}
*get_energy_obj_str = json_dumps(get_energy_obj, JSON_INDENT(4));
}
}
/* }
else
{
// We have a GPU-only build, currently doesn't support get_energy
Expand All @@ -1758,6 +1762,7 @@ int variorum_get_energy_json(char **get_energy_obj_str)
*get_energy_obj_str = json_dumps(get_energy_obj, JSON_INDENT(4));
return 0;
}
*/

json_decref(get_energy_obj);

Expand Down

0 comments on commit 67c13fa

Please sign in to comment.