From db6fcac30ee7bd8e6f5bdc9c02d6d411736137fd Mon Sep 17 00:00:00 2001 From: Arne Tarara Date: Tue, 26 Dec 2023 07:42:19 +0100 Subject: [PATCH 1/6] First version of monitor mode with only CPU utilization provider --- .../cpu/utilization/cgroup/container/source.c | 120 ++++++++-- monitor.py | 222 ++++++++++++++++++ runner.py | 6 +- 3 files changed, 322 insertions(+), 26 deletions(-) create mode 100644 monitor.py diff --git a/metric_providers/cpu/utilization/cgroup/container/source.c b/metric_providers/cpu/utilization/cgroup/container/source.c index f83c67a57..05738f6f5 100644 --- a/metric_providers/cpu/utilization/cgroup/container/source.c +++ b/metric_providers/cpu/utilization/cgroup/container/source.c @@ -6,10 +6,12 @@ #include #include // for strtok #include +#include typedef struct container_t { // struct is a specification and this static makes no sense here char path[BUFSIZ]; char *id; + int active; } container_t; // All variables are made static, because we believe that this will @@ -40,27 +42,50 @@ static long int read_cpu_cgroup(FILE *fd) { return cpu_usage; } -static long int get_cpu_stat(char* filename, int mode) { - FILE* fd = NULL; - long int result=-1; +static int scan_directory(container_t** containers, int rootless_mode) { + struct dirent* entry; + size_t docker_prefix_len = strlen("docker-"); + size_t scope_suffix_len = strlen(".scope"); + int length = 0; + DIR* dir = NULL; + char my_path[BUFSIZ] = ""; - fd = fopen(filename, "r"); - if ( fd == NULL) { - fprintf(stderr, "Error - Could not open path for reading: %s. Maybe the container is not running anymore? Are you using --rootless mode? Errno: %d\n", filename, errno); - exit(1); - } - if(mode == 1) { - result = read_cpu_cgroup(fd); - // printf("Got cgroup: %ld", result); + if(rootless_mode) { + sprintf(my_path, "/sys/fs/cgroup/user.slice/user-%d.slice/user@%d.service/user.slice/", user_id, user_id); } else { - result = read_cpu_proc(fd); - // printf("Got /proc/stat: %ld", result); + sprintf(my_path, "/sys/fs/cgroup/system.slice/"); } - fclose(fd); - return result; -} + dir = opendir(my_path); + if (!dir) { + fprintf(stderr,"Unable to scan directory for containers. Could not find folder: %s\n", my_path); + exit(-1); + } + + *containers = malloc(sizeof(container_t)); + + while ((entry = readdir(dir)) != NULL) { + // Check if the entry is a directory and matches the format + if (entry->d_type == DT_DIR && + strstr(entry->d_name, "docker-") == entry->d_name && + strstr(entry->d_name + docker_prefix_len, ".scope") != NULL && + strcmp(entry->d_name + strlen(entry->d_name) - scope_suffix_len, ".scope") == 0) { + + length++; + *containers = realloc(*containers, length * sizeof(container_t)); + (*containers)[length-1].id = strdup(entry->d_name); + (*containers)[length-1].active = 1; + sprintf((*containers)[length-1].path, + "/sys/fs/cgroup/user.slice/user-%d.slice/user@%d.service/user.slice/%s/cpu.stat", + user_id, user_id, entry->d_name); + } + } + printf("Found new length: %d\n", length); + + closedir(dir); + return length; +} static int output_stats(container_t* containers, int length) { @@ -69,28 +94,58 @@ static int output_stats(container_t* containers, int length) { long int cpu_readings_after[length]; long int container_reading; + FILE *fd = NULL; + struct timeval now; int i; - // Get Energy Readings, set timestamp mark gettimeofday(&now, NULL); + for(i=0; i Date: Tue, 26 Dec 2023 08:30:12 +0100 Subject: [PATCH 2/6] Added KeyboardInterrupt; C-Code is now silent; Added monitor to base.py --- metric_providers/base.py | 15 +++-- .../utilization/cgroup/container/provider.py | 5 +- .../cpu/utilization/cgroup/container/source.c | 7 ++- monitor.py | 55 +++++++++---------- runner.py | 11 ++-- 5 files changed, 51 insertions(+), 42 deletions(-) diff --git a/metric_providers/base.py b/metric_providers/base.py index d90f912f2..9ba3d23b9 100644 --- a/metric_providers/base.py +++ b/metric_providers/base.py @@ -22,7 +22,10 @@ def __init__( current_dir, metric_provider_executable='metric-provider-binary', sudo=False, - disable_buffer=True + disable_buffer=True, + rootless = None, + monitor=False, + ): self._metric_name = metric_name self._metrics = metrics @@ -33,7 +36,8 @@ def __init__( self._sudo = sudo self._has_started = False self._disable_buffer = disable_buffer - self._rootless = None + self._rootless = rootless + self._monitor = monitor self._tmp_folder = '/tmp/green-metrics-tool' self._ps = None @@ -112,14 +116,17 @@ def start_profiling(self, containers=None): call_string += ' ' # space at start call_string += ' '.join(self._extra_switches) - # This needs refactoring see https://github.com/green-coding-berlin/green-metrics-tool/issues/45 - if self._metrics.get('container_id') is not None: + if self._monitor is True: + call_string += ' --monitor ' + elif self._metrics.get('container_id') is not None: # This needs refactoring see https://github.com/green-coding-berlin/green-metrics-tool/issues/45 call_string += ' -s ' call_string += ','.join(containers.keys()) if self._rootless is True: call_string += ' --rootless ' + + call_string += f" > {self._filename}" if self._disable_buffer: diff --git a/metric_providers/cpu/utilization/cgroup/container/provider.py b/metric_providers/cpu/utilization/cgroup/container/provider.py index 986d36701..b6f340c91 100644 --- a/metric_providers/cpu/utilization/cgroup/container/provider.py +++ b/metric_providers/cpu/utilization/cgroup/container/provider.py @@ -3,12 +3,13 @@ from metric_providers.base import BaseMetricProvider class CpuUtilizationCgroupContainerProvider(BaseMetricProvider): - def __init__(self, resolution, rootless=False): + def __init__(self, resolution, rootless=False, monitor=False): super().__init__( metric_name='cpu_utilization_cgroup_container', metrics={'time': int, 'value': int, 'container_id': str}, resolution=resolution, unit='Ratio', current_dir=os.path.dirname(os.path.abspath(__file__)), + rootless=rootless, + monitor=monitor, ) - self._rootless = rootless diff --git a/metric_providers/cpu/utilization/cgroup/container/source.c b/metric_providers/cpu/utilization/cgroup/container/source.c index 05738f6f5..d3f5c9200 100644 --- a/metric_providers/cpu/utilization/cgroup/container/source.c +++ b/metric_providers/cpu/utilization/cgroup/container/source.c @@ -81,7 +81,7 @@ static int scan_directory(container_t** containers, int rootless_mode) { user_id, user_id, entry->d_name); } } - printf("Found new length: %d\n", length); + // printf("Found new length: %d\n", length); closedir(dir); return length; @@ -106,7 +106,7 @@ static int output_stats(container_t* containers, int length) { //printf("Looking at %s ", containers[i].path); fd = fopen(containers[i].path, "r"); if (fd == NULL) { - printf("Warning, container has disappeared in 'before': %s\n", containers[i].path); + //printf("Warning, container has disappeared in 'before': %s\n", containers[i].path); containers[i].active = 0; continue; } @@ -129,7 +129,7 @@ static int output_stats(container_t* containers, int length) { fd = fopen(containers[i].path, "r"); if (fd == NULL) { - printf("Warning, container has disappeared in 'after': %s\n", containers[i].path); + //printf("Warning, container has disappeared in 'after': %s\n", containers[i].path); containers[i].active = 0; continue; } @@ -230,6 +230,7 @@ int main(int argc, char **argv) { {"help", no_argument, NULL, 'h'}, {"interval", no_argument, NULL, 'i'}, {"containers", no_argument, NULL, 's'}, + {"monitor", no_argument, NULL, 'm'}, {NULL, 0, NULL, 0} }; diff --git a/monitor.py b/monitor.py index ab3c4876d..d6accd61b 100644 --- a/monitor.py +++ b/monitor.py @@ -10,6 +10,7 @@ import subprocess import os import sys +import time from pathlib import Path @@ -54,7 +55,7 @@ def monitor(self): # self.checkout_repository() self.initialize_run() #self.initial_parse() - self.import_metric_providers() + self.import_metric_providers(monitor=True) #self.populate_image_names() self.prepare_docker() # self.check_running_containers() @@ -75,22 +76,12 @@ def monitor(self): self.start_metric_providers(allow_container=True, allow_other=False) - self.start_phase('MOCK_BASELINE', transition=False, silent=True) - self.end_phase('MOCK_BASELINE') - - self.start_phase('MOCK_INSTALLATION', transition=False, silent=True) - self.end_phase('MOCK_INSTALLATION') - - self.start_phase('MOCK_BOOT', transition=False, silent=True) - self.end_phase('MOCK_BOOT') - - self.start_phase('MOCK_IDLE', transition=False, silent=True) - self.end_phase('MOCK_IDLE') - - - self.start_phase('[RUNTIME]') + self.start_phase('[RUNTIME]', transition=False) # TODO: Trigger - self.custom_sleep(2) + + print('Monitoring active ... press CTRL+C to stop and save data.') + while True: + time.sleep(3600) @@ -98,42 +89,46 @@ def monitor(self): self.add_to_log(exc.__class__.__name__, str(exc)) raise exc finally: - self.end_phase('[RUNTIME]') - self.end_measurement() - self.store_phases() - self.update_start_and_end_times() - try: - self.read_container_logs() + self.end_phase('[RUNTIME]') + self.end_measurement() + self.store_phases() + self.update_start_and_end_times() except BaseException as exc: self.add_to_log(exc.__class__.__name__, str(exc)) raise exc finally: try: - self.read_and_cleanup_processes() + self.read_container_logs() except BaseException as exc: self.add_to_log(exc.__class__.__name__, str(exc)) raise exc finally: try: - self.save_notes_runner() + self.read_and_cleanup_processes() except BaseException as exc: self.add_to_log(exc.__class__.__name__, str(exc)) raise exc finally: try: - self.stop_metric_providers() + self.save_notes_runner() except BaseException as exc: self.add_to_log(exc.__class__.__name__, str(exc)) raise exc finally: try: - self.save_stdout_logs() + self.stop_metric_providers() except BaseException as exc: self.add_to_log(exc.__class__.__name__, str(exc)) raise exc finally: - self.cleanup() # always run cleanup automatically after each run + try: + self.save_stdout_logs() + except BaseException as exc: + self.add_to_log(exc.__class__.__name__, str(exc)) + raise exc + finally: + self.cleanup() # always run cleanup automatically after each run return self._run_id @@ -199,13 +194,15 @@ def monitor(self): from tools.phase_stats import build_and_store_phase_stats print("Run id is", monitor._run_id) - build_and_store_phase_stats(monitor._run_id, monitor._sci) + except KeyboardInterrupt: + from tools.phase_stats import build_and_store_phase_stats + print("Aggregating and uploading phase_stats. This can take a while for longer runs ...") + build_and_store_phase_stats(monitor._run_id, monitor._sci) print(TerminalColors.OKGREEN,'\n\n####################################################################################') print(f"Please access your report on the URL {GlobalConfig().config['cluster']['metrics_url']}/stats.html?id={monitor._run_id}") print('####################################################################################\n\n', TerminalColors.ENDC) - except FileNotFoundError as e: error_helpers.log_error('File or executable not found', e, monitor._run_id) except subprocess.CalledProcessError as e: diff --git a/runner.py b/runner.py index 2a4c9d663..6d121ca32 100755 --- a/runner.py +++ b/runner.py @@ -476,7 +476,7 @@ def update_and_insert_specs(self): self._run_id) ) - def import_metric_providers(self): + def import_metric_providers(self, monitor=False): config = GlobalConfig().config print(TerminalColors.HEADER, '\nImporting metric providers', TerminalColors.ENDC) @@ -500,6 +500,9 @@ def import_metric_providers(self): if rootless and '.cgroup.' in module_path: conf['rootless'] = True + if monitor and '.cgroup.' in module_path: + conf['monitor'] = True + print(f"Importing {class_name} from {module_path}") print(f"Configuration is {conf}") @@ -966,11 +969,10 @@ def start_metric_providers(self, allow_container=True, allow_other=True): raise RuntimeError(f"Stderr on {metric_provider.__class__.__name__} was NOT empty: {stderr_read}") - def start_phase(self, phase, transition=True, silent=False): + def start_phase(self, phase, transition=True): config = GlobalConfig().config - if not silent: - print(TerminalColors.HEADER, f"\nStarting phase {phase}.", TerminalColors.ENDC) + print(TerminalColors.HEADER, f"\nStarting phase {phase}.", TerminalColors.ENDC) if transition: # The force-sleep must go and we must actually check for the temperature baseline @@ -1499,6 +1501,7 @@ def run(self): from tools.phase_stats import build_and_store_phase_stats print("Run id is", runner._run_id) + print("Aggregating and uploading phase_stats. This can take a while for longer runs ...") build_and_store_phase_stats(runner._run_id, runner._sci) From 8ef1ac239e66d1d4c2969f99c4521f97c7f6d3ad Mon Sep 17 00:00:00 2001 From: Arne Tarara Date: Wed, 15 May 2024 17:34:10 +0200 Subject: [PATCH 3/6] Updated XGBoost --- metric_providers/psu/energy/ac/xgboost/machine/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metric_providers/psu/energy/ac/xgboost/machine/model b/metric_providers/psu/energy/ac/xgboost/machine/model index 16f45ee04..50ab433cf 160000 --- a/metric_providers/psu/energy/ac/xgboost/machine/model +++ b/metric_providers/psu/energy/ac/xgboost/machine/model @@ -1 +1 @@ -Subproject commit 16f45ee04d57442544422097179e20fb0a420665 +Subproject commit 50ab433cf65bfe600d5a7e034677f0945b5d8841 From 67470c110d3e89b509d17621272aaa3565a2a547 Mon Sep 17 00:00:00 2001 From: Arne Tarara Date: Wed, 15 May 2024 18:00:35 +0200 Subject: [PATCH 4/6] Monitor Mode ready for new GMT version --- api/api_helpers.py | 8 +- docker/structure.sql | 1 + .../cpu/utilization/cgroup/container/source.c | 28 +++-- migrations/2024_05_15_monitor_mode.sql | 1 + monitor.py | 109 ++++++++---------- optimization_providers/base.py | 2 +- optimization_providers/durations/container.py | 19 +-- .../resources/utilization.py | 10 +- runner.py | 74 ++++++------ 9 files changed, 127 insertions(+), 125 deletions(-) create mode 100644 migrations/2024_05_15_monitor_mode.sql diff --git a/api/api_helpers.py b/api/api_helpers.py index c8d5848b2..6d97ea62e 100644 --- a/api/api_helpers.py +++ b/api/api_helpers.py @@ -331,7 +331,7 @@ def get_phase_stats(ids): query = """ SELECT a.phase, a.metric, a.detail_name, a.value, a.type, a.max_value, a.min_value, a.unit, - b.uri, c.description, b.filename, b.commit_hash, b.branch + b.uri, c.description, b.filename, b.commit_hash, b.branch, b.monitor_run FROM phase_stats as a LEFT JOIN runs as b on b.id = a.run_id LEFT JOIN machines as c on c.id = b.machine_id @@ -452,12 +452,14 @@ def get_phase_stats_object(phase_stats, case): for phase_stat in phase_stats: [ phase, metric_name, detail_name, value, metric_type, max_value, min_value, unit, - repo, machine_description, filename, commit_hash, branch + repo, machine_description, filename, commit_hash, branch, monitor_run ] = phase_stat # unpack phase = phase.split('_', maxsplit=1)[1] # remove the 001_ prepended stuff again, which is only for ordering - if case == 'Repository': + if monitor_run: + key = 'monitor' + elif case == 'Repository': key = repo # Case D : RequirementsEngineering Case elif case == 'Branch': key = branch # Case C_3 : SoftwareDeveloper Case diff --git a/docker/structure.sql b/docker/structure.sql index c8dedcbff..91d8072b7 100644 --- a/docker/structure.sql +++ b/docker/structure.sql @@ -67,6 +67,7 @@ CREATE TABLE runs ( logs text, invalid_run text, failed boolean DEFAULT false, + monitor_run boolean DEFAULT false, created_at timestamp with time zone DEFAULT now(), updated_at timestamp with time zone ); diff --git a/metric_providers/cpu/utilization/cgroup/container/source.c b/metric_providers/cpu/utilization/cgroup/container/source.c index 82b8f56cf..95c5b1680 100644 --- a/metric_providers/cpu/utilization/cgroup/container/source.c +++ b/metric_providers/cpu/utilization/cgroup/container/source.c @@ -64,6 +64,7 @@ static int scan_directory(container_t** containers, int rootless_mode) { } *containers = malloc(sizeof(container_t)); + //printf("old length: %d\n", length); while ((entry = readdir(dir)) != NULL) { // Check if the entry is a directory and matches the format @@ -71,17 +72,23 @@ static int scan_directory(container_t** containers, int rootless_mode) { strstr(entry->d_name, "docker-") == entry->d_name && strstr(entry->d_name + docker_prefix_len, ".scope") != NULL && strcmp(entry->d_name + strlen(entry->d_name) - scope_suffix_len, ".scope") == 0) { - + // printf("Entry %s\n", entry->d_name); length++; *containers = realloc(*containers, length * sizeof(container_t)); (*containers)[length-1].id = strdup(entry->d_name); (*containers)[length-1].active = 1; - sprintf((*containers)[length-1].path, - "/sys/fs/cgroup/user.slice/user-%d.slice/user@%d.service/user.slice/%s/cpu.stat", - user_id, user_id, entry->d_name); + if(rootless_mode) { + sprintf((*containers)[length-1].path, + "/sys/fs/cgroup/user.slice/user-%d.slice/user@%d.service/user.slice/%s/cpu.stat", + user_id, user_id, entry->d_name); + } else { + sprintf((*containers)[length-1].path, + "/sys/fs/cgroup/system.slice/%s/cpu.stat", + entry->d_name); + } } } - // printf("Found new length: %d\n", length); + //printf("Found new length: %d\n", length); closedir(dir); return length; @@ -103,10 +110,10 @@ static int output_stats(container_t* containers, int length) { gettimeofday(&now, NULL); for(i=0; i Date: Wed, 15 May 2024 18:15:30 +0200 Subject: [PATCH 5/6] New row excluded from diffing --- tests/lib/test_diff.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/lib/test_diff.py b/tests/lib/test_diff.py index 86b27ce9c..fd3b9e9b4 100644 --- a/tests/lib/test_diff.py +++ b/tests/lib/test_diff.py @@ -8,7 +8,10 @@ # For the diffing to work as expected it is important that we include a known set of columns # It might happen that at some point a dev adds a column to the table, but forgets to also add it -# to the diffing. To prevent this, this Unit test checks if the table column signature is unchanged +# to the diffing. +# To prevent this, this Unit test checks if the table column signature is unchanged +# +# If this test fails and an additional column should be diffed please add it to the file lib.diff.py::get_diffable_row() def test_run_signature(): expected_signature = 'id,job_id,name,uri,branch,commit_hash,commit_timestamp,email,categories,usage_scenario,filename,machine_specs,runner_arguments,machine_id,gmt_hash,measurement_config,start_measurement,end_measurement,phases,logs,invalid_run,failed,created_at,updated_at' From 56908ddba1234f4f408f9942cff1fb3be27963f0 Mon Sep 17 00:00:00 2001 From: Arne Tarara Date: Wed, 15 May 2024 18:26:38 +0200 Subject: [PATCH 6/6] Added column to diffing --- tests/lib/test_diff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lib/test_diff.py b/tests/lib/test_diff.py index fd3b9e9b4..904a18f43 100644 --- a/tests/lib/test_diff.py +++ b/tests/lib/test_diff.py @@ -14,7 +14,7 @@ # If this test fails and an additional column should be diffed please add it to the file lib.diff.py::get_diffable_row() def test_run_signature(): - expected_signature = 'id,job_id,name,uri,branch,commit_hash,commit_timestamp,email,categories,usage_scenario,filename,machine_specs,runner_arguments,machine_id,gmt_hash,measurement_config,start_measurement,end_measurement,phases,logs,invalid_run,failed,created_at,updated_at' + expected_signature = 'id,job_id,name,uri,branch,commit_hash,commit_timestamp,email,categories,usage_scenario,filename,machine_specs,runner_arguments,machine_id,gmt_hash,measurement_config,start_measurement,end_measurement,phases,logs,invalid_run,failed,monitor_run,created_at,updated_at' current_signature = DB().fetch_all("SELECT column_name FROM information_schema.columns WHERE table_name = 'runs' ORDER BY ordinal_position;") current_signature = ",".join([x[0] for x in current_signature])