Skip to content

Commit

Permalink
Sy/slurm sacct (#19117)
Browse files Browse the repository at this point in the history
* fix small bug

* changelog
  • Loading branch information
steveny91 authored Nov 22, 2024
1 parent d1b738d commit f81d344
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 3 deletions.
1 change: 1 addition & 0 deletions slurm/changelog.d/19117.fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix issue in which the sacct params kept growing with each iteration
8 changes: 5 additions & 3 deletions slurm/datadog_checks/slurm/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,18 +316,20 @@ def process_sdiag(self, output):
self.gauge('sdiag.enabled', 1)

def _update_sacct_params(self):
sacct_params = SACCT_PARAMS.copy()
if self.last_run_time is not None:
now = get_timestamp()
delta = now - self.last_run_time
start_time_param = f"--starttime=now-{int(delta)}seconds"

SACCT_PARAMS.append(start_time_param)
sacct_params = [param for param in sacct_params if not param.startswith('--starttime')]
sacct_params.append(start_time_param)
self.log.debug("Updating sacct command with new timestamp: %s", start_time_param)

self.last_run_time = get_timestamp()

# Update the sacct command with the dynamic SACCT_PARAMS
self.log.debug("Updating sacct command with new timestamp: %s", start_time_param)
self.sacct_cmd = self.get_slurm_command('sacct', SACCT_PARAMS)
self.sacct_cmd = self.get_slurm_command('sacct', sacct_params)

def _process_sinfo_cpu_state(self, cpus_state, namespace, tags):
# "0/2/0/2"
Expand Down
19 changes: 19 additions & 0 deletions slurm/tests/test_unit.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# (C) Datadog, Inc. 2024-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
import time
from unittest.mock import patch

import pytest

from datadog_checks.slurm import SlurmCheck
from datadog_checks.slurm.constants import SACCT_PARAMS

from .common import (
DEFAULT_SINFO_PATH,
Expand Down Expand Up @@ -50,6 +52,23 @@ def test_sinfo_command_params(collection_level, gpu_stats, expected_params, inst
assert check.sinfo_partition_cmd == expected_params


def test_acct_command_params(instance):
# Mock the instance configuration
instance['collect_sacct_stats'] = True

check = SlurmCheck('slurm', {}, [instance])
base_cmd = ['/usr/bin/sacct'] + SACCT_PARAMS

# Test to ensure that the sacct is being constructed correctly
loops = [0, 1, 2]
for loop in loops:
if loop > 0:
time.sleep(loop)
check._update_sacct_params()
expected_cmd = base_cmd + ([f'--starttime=now-{loop}seconds'] if loop > 0 else [])
assert check.sacct_cmd == expected_cmd


@pytest.mark.parametrize(
"expected_metrics, binary",
[
Expand Down

0 comments on commit f81d344

Please sign in to comment.