Skip to content

Commit

Permalink
Improved logging for RAPL permission issues and fixed tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Snailed committed Feb 6, 2025
1 parent 335efe3 commit e537a96
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 27 deletions.
2 changes: 1 addition & 1 deletion carbontracker/components/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def collect_power_usage(self, epoch: int):
# Only raise error if no measurements have been collected.
if not self.power_usages[-1]:
self.logger.err_critical(
f"Could not read CPU/DRAM energy consumption due to lack of read-permissions.\n\tPlease run the following command(s): \n\t\t{"\n\t\t".join(commands)}"
r"Could not read CPU/DRAM energy consumption due to lack of read-permissions.\n\tPlease run the following command(s): \n\t\t" + r"\n\t\t".join(commands)
)
# Append zero measurement to avoid further errors.
self.power_usages.append([0])
Expand Down
4 changes: 3 additions & 1 deletion carbontracker/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import List

class NoComponentsAvailableError(Exception):
def __init__(
self,
Expand All @@ -23,7 +25,7 @@ def __init__(self, expected_unit, received_unit, message):
class IntelRaplPermissionError(Exception):
"""Raised when an Intel RAPL permission error occurs."""

def __init__(self, file_names: list[str]):
def __init__(self, file_names: List[str]):
self.file_names = file_names


Expand Down
4 changes: 2 additions & 2 deletions tests/components/test_intel.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_available(self, mock_listdir, mock_exists):
mock_exists.return_value = True
mock_listdir.return_value = ["some_directory"]

component = Component(name='cpu', pids=[], devices_by_pid={})
component = Component(name='cpu', pids=[], devices_by_pid={}, logger=None)
self.assertTrue(component.available())

@patch("os.path.exists")
Expand All @@ -35,7 +35,7 @@ def test_available_false(self, mock_available, mock_listdir, mock_exists):
mock_exists.return_value = False
mock_listdir.return_value = []

cpu = Component(name='cpu', pids=[], devices_by_pid={})
cpu = Component(name='cpu', pids=[], devices_by_pid={}, logger=None)
self.assertFalse(cpu.available())

@patch("time.sleep")
Expand Down
44 changes: 22 additions & 22 deletions tests/test_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,23 @@ class TestComponent(unittest.TestCase):
def test_init_valid_component(
self, mock_handlers_by_name, mock_error_by_name, mock_component_names
):
component = Component(name="gpu", pids=[], devices_by_pid=False)
component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None)
self.assertEqual(component.name, "gpu")
self.assertEqual(component._handler, mock_handlers_by_name()[0]())

def test_init_invalid_component(self):
with self.assertRaises(exceptions.ComponentNameError):
Component(name="unknown", pids=[], devices_by_pid=False)
Component(name="unknown", pids=[], devices_by_pid=False, logger=None)

def test_devices(self):
handler_mock = MagicMock(devices=MagicMock(return_value=["Test GPU"]))
component = Component(name="gpu", pids=[], devices_by_pid=False)
component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None)
component._handler = handler_mock
self.assertEqual(component.devices(), ["Test GPU"])

def test_available_true(self):
handler_mock = MagicMock(available=MagicMock(return_value=True))
component = Component(name="gpu", pids=[], devices_by_pid=False)
component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None)
component._handler = handler_mock
self.assertTrue(component.available())

Expand All @@ -52,33 +52,33 @@ def test_available_true(self):
return_value=False,
)
def test_available_false(self, mock_apple_gpu_available, mock_nvidia_gpu_available):
component = Component(name="gpu", pids=[], devices_by_pid=False)
component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None)
self.assertFalse(component.available())

def test_collect_power_usage_no_measurement(self):
handler_mock = MagicMock(
power_usage=MagicMock(side_effect=exceptions.IntelRaplPermissionError)
power_usage=MagicMock(side_effect=exceptions.IntelRaplPermissionError(file_names=["file1", "file2"]))
)
component = Component(name="cpu", pids=[], devices_by_pid=False)
component = Component(name="cpu", pids=[], devices_by_pid=False, logger=MagicMock(err_critical=MagicMock()))
component._handler = handler_mock
component.collect_power_usage(epoch=1)
self.assertEqual(component.power_usages, [[], [0]])

def test_collect_power_usage_with_measurement(self):
handler_mock = MagicMock(power_usage=MagicMock(return_value=[1000]))
component = Component(name="cpu", pids=[], devices_by_pid=False)
component = Component(name="cpu", pids=[], devices_by_pid=False, logger=None)
component._handler = handler_mock
component.collect_power_usage(epoch=1)
self.assertEqual(component.power_usages, [[1000]])

def test_collect_power_usage_with_measurement_but_no_epoch(self):
power_collector = Component(name="cpu", pids=[], devices_by_pid=False)
power_collector = Component(name="cpu", pids=[], devices_by_pid=False, logger=None)
power_collector._handler = MagicMock(power_usage=MagicMock(return_value=[1000]))
power_collector.collect_power_usage(epoch=0)
assert len(power_collector.power_usages) == 0

def test_collect_power_usage_with_previous_measurement(self):
power_collector = Component(name="cpu", pids=[], devices_by_pid=False)
power_collector = Component(name="cpu", pids=[], devices_by_pid=False, logger=None)
power_collector._handler = MagicMock(power_usage=MagicMock(return_value=[1000]))
power_collector.collect_power_usage(epoch=1)
power_collector.collect_power_usage(epoch=3)
Expand All @@ -88,13 +88,13 @@ def test_collect_power_usage_GPUPowerUsageRetrievalError(self):
handler_mock = MagicMock(
power_usage=MagicMock(side_effect=exceptions.GPUPowerUsageRetrievalError)
)
component = Component(name="gpu", pids=[], devices_by_pid=False)
component = Component(name="gpu", pids=[], devices_by_pid=False, logger=MagicMock(err_critical=MagicMock()))
component._handler = handler_mock
component.collect_power_usage(epoch=1)
self.assertEqual(component.power_usages, [[], [0]])

def test_energy_usage(self):
component = Component(name="cpu", pids=[], devices_by_pid=False)
component = Component(name="cpu", pids=[], devices_by_pid=False, logger=None)
component.power_usages = [[1000], [2000], [3000]]
epoch_times = [1, 2, 3]
energy_usages = component.energy_usage(epoch_times)
Expand All @@ -104,14 +104,14 @@ def test_energy_usage(self):
self.assertTrue(np.all(np.array(energy_usages) > 0))

def test_energy_usage_no_measurements(self):
component = Component(name="cpu", pids=[], devices_by_pid=False)
component = Component(name="cpu", pids=[], devices_by_pid=False, logger=None)
component.power_usages = [[]]
epoch_times = [1]
energy_usages = component.energy_usage(epoch_times)
self.assertEqual(energy_usages, [0])

def test_energy_usage_with_power_from_later_epoch(self):
component = Component(name="cpu", pids=[], devices_by_pid=False)
component = Component(name="cpu", pids=[], devices_by_pid=False, logger=None)
component.power_usages = [[1000], [2000], [3000]]
epoch_times = [1, 2, 3, 4]
energy_usages = component.energy_usage(epoch_times)
Expand All @@ -121,7 +121,7 @@ def test_energy_usage_with_power_from_later_epoch(self):
)

def test_energy_usage_no_power(self):
component = Component(name="cpu", pids=[], devices_by_pid=False)
component = Component(name="cpu", pids=[], devices_by_pid=False, logger=None)
component.power_usages = [[], [], [], [], []]
epoch_times = [1, 2, 3, 4, 5]
energy_usages = component.energy_usage(epoch_times)
Expand All @@ -132,7 +132,7 @@ def test_energy_usage_no_power(self):

def test_init(self):
handler_mock = MagicMock()
component = Component(name="gpu", pids=[], devices_by_pid=False)
component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None)
component._handler = handler_mock
component.init()
handler_mock.init.assert_called_once()
Expand All @@ -144,15 +144,15 @@ def test_init(self):

def test_shutdown(self):
handler_mock = MagicMock()
component = Component(name="gpu", pids=[], devices_by_pid=False)
component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None)
component._handler = handler_mock
component.shutdown()
handler_mock.shutdown.assert_called_once()

def test_create_components(self):
gpu = create_components("gpu", pids=[], devices_by_pid=False)
cpu = create_components("cpu", pids=[], devices_by_pid=False)
all_components = create_components("all", pids=[], devices_by_pid=False)
gpu = create_components("gpu", pids=[], devices_by_pid=False, logger=None)
cpu = create_components("cpu", pids=[], devices_by_pid=False, logger=None)
all_components = create_components("all", pids=[], devices_by_pid=False, logger=None)
self.assertEqual(len(gpu), 1)
self.assertEqual(len(cpu), 1)
self.assertEqual(len(all_components), 2)
Expand All @@ -166,12 +166,12 @@ def test_error_by_name(self):
)

def test_handler_property_with_handler_set(self):
component = Component(name="gpu", pids=[], devices_by_pid=False)
component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None)
component._handler = "test"
self.assertEqual(component.handler, "test")

def test_handler_property_without_handler(self):
component = Component(name="gpu", pids=[], devices_by_pid=False)
component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None)
component._handler = None
with self.assertRaises(exceptions.GPUError):
component.handler()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_unit_error(self):

def test_intel_rapl_permission_error(self):
with self.assertRaises(exceptions.IntelRaplPermissionError):
raise exceptions.IntelRaplPermissionError
raise exceptions.IntelRaplPermissionError(file_names=["file1", "file2"])

def test_gpu_power_usage_retrieval_error(self):
with self.assertRaises(exceptions.GPUPowerUsageRetrievalError):
Expand Down

0 comments on commit e537a96

Please sign in to comment.