From e537a969898f21aad629e37e86d9046cfd4152b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rasmus=20Hag=20L=C3=B8vstad?= Date: Thu, 6 Feb 2025 14:16:05 +0100 Subject: [PATCH] Improved logging for RAPL permission issues and fixed tests --- carbontracker/components/component.py | 2 +- carbontracker/exceptions.py | 4 ++- tests/components/test_intel.py | 4 +-- tests/test_component.py | 44 +++++++++++++-------------- tests/test_exceptions.py | 2 +- 5 files changed, 29 insertions(+), 27 deletions(-) diff --git a/carbontracker/components/component.py b/carbontracker/components/component.py index 2b1a081..9f7d2d1 100644 --- a/carbontracker/components/component.py +++ b/carbontracker/components/component.py @@ -106,7 +106,7 @@ def collect_power_usage(self, epoch: int): # Only raise error if no measurements have been collected. if not self.power_usages[-1]: self.logger.err_critical( - f"Could not read CPU/DRAM energy consumption due to lack of read-permissions.\n\tPlease run the following command(s): \n\t\t{"\n\t\t".join(commands)}" + r"Could not read CPU/DRAM energy consumption due to lack of read-permissions.\n\tPlease run the following command(s): \n\t\t" + r"\n\t\t".join(commands) ) # Append zero measurement to avoid further errors. self.power_usages.append([0]) diff --git a/carbontracker/exceptions.py b/carbontracker/exceptions.py index 20cfce7..733510f 100644 --- a/carbontracker/exceptions.py +++ b/carbontracker/exceptions.py @@ -1,3 +1,5 @@ +from typing import List + class NoComponentsAvailableError(Exception): def __init__( self, @@ -23,7 +25,7 @@ def __init__(self, expected_unit, received_unit, message): class IntelRaplPermissionError(Exception): """Raised when an Intel RAPL permission error occurs.""" - def __init__(self, file_names: list[str]): + def __init__(self, file_names: List[str]): self.file_names = file_names diff --git a/tests/components/test_intel.py b/tests/components/test_intel.py index 9a48dd8..3ed56e0 100644 --- a/tests/components/test_intel.py +++ b/tests/components/test_intel.py @@ -12,7 +12,7 @@ def test_available(self, mock_listdir, mock_exists): mock_exists.return_value = True mock_listdir.return_value = ["some_directory"] - component = Component(name='cpu', pids=[], devices_by_pid={}) + component = Component(name='cpu', pids=[], devices_by_pid={}, logger=None) self.assertTrue(component.available()) @patch("os.path.exists") @@ -35,7 +35,7 @@ def test_available_false(self, mock_available, mock_listdir, mock_exists): mock_exists.return_value = False mock_listdir.return_value = [] - cpu = Component(name='cpu', pids=[], devices_by_pid={}) + cpu = Component(name='cpu', pids=[], devices_by_pid={}, logger=None) self.assertFalse(cpu.available()) @patch("time.sleep") diff --git a/tests/test_component.py b/tests/test_component.py index 92a1cb1..33f7f0d 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -24,23 +24,23 @@ class TestComponent(unittest.TestCase): def test_init_valid_component( self, mock_handlers_by_name, mock_error_by_name, mock_component_names ): - component = Component(name="gpu", pids=[], devices_by_pid=False) + component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None) self.assertEqual(component.name, "gpu") self.assertEqual(component._handler, mock_handlers_by_name()[0]()) def test_init_invalid_component(self): with self.assertRaises(exceptions.ComponentNameError): - Component(name="unknown", pids=[], devices_by_pid=False) + Component(name="unknown", pids=[], devices_by_pid=False, logger=None) def test_devices(self): handler_mock = MagicMock(devices=MagicMock(return_value=["Test GPU"])) - component = Component(name="gpu", pids=[], devices_by_pid=False) + component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None) component._handler = handler_mock self.assertEqual(component.devices(), ["Test GPU"]) def test_available_true(self): handler_mock = MagicMock(available=MagicMock(return_value=True)) - component = Component(name="gpu", pids=[], devices_by_pid=False) + component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None) component._handler = handler_mock self.assertTrue(component.available()) @@ -52,33 +52,33 @@ def test_available_true(self): return_value=False, ) def test_available_false(self, mock_apple_gpu_available, mock_nvidia_gpu_available): - component = Component(name="gpu", pids=[], devices_by_pid=False) + component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None) self.assertFalse(component.available()) def test_collect_power_usage_no_measurement(self): handler_mock = MagicMock( - power_usage=MagicMock(side_effect=exceptions.IntelRaplPermissionError) + power_usage=MagicMock(side_effect=exceptions.IntelRaplPermissionError(file_names=["file1", "file2"])) ) - component = Component(name="cpu", pids=[], devices_by_pid=False) + component = Component(name="cpu", pids=[], devices_by_pid=False, logger=MagicMock(err_critical=MagicMock())) component._handler = handler_mock component.collect_power_usage(epoch=1) self.assertEqual(component.power_usages, [[], [0]]) def test_collect_power_usage_with_measurement(self): handler_mock = MagicMock(power_usage=MagicMock(return_value=[1000])) - component = Component(name="cpu", pids=[], devices_by_pid=False) + component = Component(name="cpu", pids=[], devices_by_pid=False, logger=None) component._handler = handler_mock component.collect_power_usage(epoch=1) self.assertEqual(component.power_usages, [[1000]]) def test_collect_power_usage_with_measurement_but_no_epoch(self): - power_collector = Component(name="cpu", pids=[], devices_by_pid=False) + power_collector = Component(name="cpu", pids=[], devices_by_pid=False, logger=None) power_collector._handler = MagicMock(power_usage=MagicMock(return_value=[1000])) power_collector.collect_power_usage(epoch=0) assert len(power_collector.power_usages) == 0 def test_collect_power_usage_with_previous_measurement(self): - power_collector = Component(name="cpu", pids=[], devices_by_pid=False) + power_collector = Component(name="cpu", pids=[], devices_by_pid=False, logger=None) power_collector._handler = MagicMock(power_usage=MagicMock(return_value=[1000])) power_collector.collect_power_usage(epoch=1) power_collector.collect_power_usage(epoch=3) @@ -88,13 +88,13 @@ def test_collect_power_usage_GPUPowerUsageRetrievalError(self): handler_mock = MagicMock( power_usage=MagicMock(side_effect=exceptions.GPUPowerUsageRetrievalError) ) - component = Component(name="gpu", pids=[], devices_by_pid=False) + component = Component(name="gpu", pids=[], devices_by_pid=False, logger=MagicMock(err_critical=MagicMock())) component._handler = handler_mock component.collect_power_usage(epoch=1) self.assertEqual(component.power_usages, [[], [0]]) def test_energy_usage(self): - component = Component(name="cpu", pids=[], devices_by_pid=False) + component = Component(name="cpu", pids=[], devices_by_pid=False, logger=None) component.power_usages = [[1000], [2000], [3000]] epoch_times = [1, 2, 3] energy_usages = component.energy_usage(epoch_times) @@ -104,14 +104,14 @@ def test_energy_usage(self): self.assertTrue(np.all(np.array(energy_usages) > 0)) def test_energy_usage_no_measurements(self): - component = Component(name="cpu", pids=[], devices_by_pid=False) + component = Component(name="cpu", pids=[], devices_by_pid=False, logger=None) component.power_usages = [[]] epoch_times = [1] energy_usages = component.energy_usage(epoch_times) self.assertEqual(energy_usages, [0]) def test_energy_usage_with_power_from_later_epoch(self): - component = Component(name="cpu", pids=[], devices_by_pid=False) + component = Component(name="cpu", pids=[], devices_by_pid=False, logger=None) component.power_usages = [[1000], [2000], [3000]] epoch_times = [1, 2, 3, 4] energy_usages = component.energy_usage(epoch_times) @@ -121,7 +121,7 @@ def test_energy_usage_with_power_from_later_epoch(self): ) def test_energy_usage_no_power(self): - component = Component(name="cpu", pids=[], devices_by_pid=False) + component = Component(name="cpu", pids=[], devices_by_pid=False, logger=None) component.power_usages = [[], [], [], [], []] epoch_times = [1, 2, 3, 4, 5] energy_usages = component.energy_usage(epoch_times) @@ -132,7 +132,7 @@ def test_energy_usage_no_power(self): def test_init(self): handler_mock = MagicMock() - component = Component(name="gpu", pids=[], devices_by_pid=False) + component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None) component._handler = handler_mock component.init() handler_mock.init.assert_called_once() @@ -144,15 +144,15 @@ def test_init(self): def test_shutdown(self): handler_mock = MagicMock() - component = Component(name="gpu", pids=[], devices_by_pid=False) + component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None) component._handler = handler_mock component.shutdown() handler_mock.shutdown.assert_called_once() def test_create_components(self): - gpu = create_components("gpu", pids=[], devices_by_pid=False) - cpu = create_components("cpu", pids=[], devices_by_pid=False) - all_components = create_components("all", pids=[], devices_by_pid=False) + gpu = create_components("gpu", pids=[], devices_by_pid=False, logger=None) + cpu = create_components("cpu", pids=[], devices_by_pid=False, logger=None) + all_components = create_components("all", pids=[], devices_by_pid=False, logger=None) self.assertEqual(len(gpu), 1) self.assertEqual(len(cpu), 1) self.assertEqual(len(all_components), 2) @@ -166,12 +166,12 @@ def test_error_by_name(self): ) def test_handler_property_with_handler_set(self): - component = Component(name="gpu", pids=[], devices_by_pid=False) + component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None) component._handler = "test" self.assertEqual(component.handler, "test") def test_handler_property_without_handler(self): - component = Component(name="gpu", pids=[], devices_by_pid=False) + component = Component(name="gpu", pids=[], devices_by_pid=False, logger=None) component._handler = None with self.assertRaises(exceptions.GPUError): component.handler() diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index a4c0992..f75582a 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -12,7 +12,7 @@ def test_unit_error(self): def test_intel_rapl_permission_error(self): with self.assertRaises(exceptions.IntelRaplPermissionError): - raise exceptions.IntelRaplPermissionError + raise exceptions.IntelRaplPermissionError(file_names=["file1", "file2"]) def test_gpu_power_usage_retrieval_error(self): with self.assertRaises(exceptions.GPUPowerUsageRetrievalError):