Skip to content

Commit

Permalink
add option to remove iperf servers
Browse files Browse the repository at this point in the history
Signed-off-by: vezio <tyler.rimaldi@ibm.com>
  • Loading branch information
vezio committed Aug 25, 2024
1 parent 5d8dcaa commit 2d670df
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 0 deletions.
70 changes: 70 additions & 0 deletions autopilot-daemon/network/iperf3_stop_servers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from iperf3_utils import *

import subprocess
import os
import sys
import signal


def kill_all_iperf_servers():
try:
result = subprocess.run(
["ps", "aux"], text=True, capture_output=True, check=True
)
except subprocess.CalledProcessError as e:
print(f"Error occurred while listing processes: {e}")
sys.exit(1)

processes = result.stdout.splitlines()

for process in processes:
try:
if "iperf3 -s" in process and "-s" in process:
parts = process.split()
if len(parts) > 1:
pid = int(parts[1])
# Not killing default iperf server spun up on entrypoint...
if pid > 1:
log.info(
f"Killing iperf3 server process (PID: {pid}) in {CURR_POD_NAME} on {CURR_WORKER_NODE_NAME}"
)
try:
os.kill(pid, signal.SIGTERM)
except PermissionError:
log.error(
f"Permission denied: Could not kill process with PID {pid} in {CURR_POD_NAME} on {CURR_WORKER_NODE_NAME}."
)
sys.exit(1)
except ProcessLookupError:
log.error(
f"Process with PID {pid} does not exist in {CURR_POD_NAME} on {CURR_WORKER_NODE_NAME}."
)
sys.exit(1)
except Exception as e:
log.error(
f"Failed to kill process with PID {pid}: {e} in {CURR_POD_NAME} on {CURR_WORKER_NODE_NAME}"
)
sys.exit(1)
else:
log.info(
f"Nothing left to kill in {CURR_POD_NAME} on {CURR_WORKER_NODE_NAME} (Not killing default entrypoint iperf3 server)."
)
else:
log.error(
f"Unexpected format in process line: {process} in {CURR_POD_NAME} on {CURR_WORKER_NODE_NAME}"
)
sys.exit(1)
except ValueError:
log.error(
f"Could not convert PID to an integer: {process} in {CURR_POD_NAME} on {CURR_WORKER_NODE_NAME}"
)
sys.exit(1)
except Exception as e:
log.error(
f"An unexpected error occurred: {e} in {CURR_POD_NAME} on {CURR_WORKER_NODE_NAME}"
)
sys.exit(1)


if __name__ == "__main__":
kill_all_iperf_servers()
4 changes: 4 additions & 0 deletions autopilot-daemon/network/network_workload.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ def __init__(self, namespace=None, workload_name="Ring Topology"):
exit(1)

def gen_autopilot_node_map_json(self):
#
# TODO: This is bad because it gets all endpoints, but what happens if
# we have a failing worker that doesn't have any pods? Well we skip it...this bad...
#
try:
endpoints = self.v1.list_namespaced_endpoints(
self.namespace,
Expand Down

0 comments on commit 2d670df

Please sign in to comment.