add support for prject_number and config-file arguments in output-par…

…sers
GoogleCloudPlatform · Aug 5, 2024 · d5c20fe · d5c20fe
1 parent 12049a5
commit d5c20fe
Show file tree

Hide file tree

Showing 3 changed files with 63 additions and 9 deletions.
diff --git a/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py b/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py
@@ -15,6 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import argparse
 import json, os, pprint, subprocess
 import sys
 import dlio_workload
@@ -64,6 +65,26 @@ def downloadDlioOutputs(dlioWorkloads):
 
 
 if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ prog="DLIO Unet3d test output parser",
+ description=(
+ "This program takes in a json test-config file and parses it for"
+ " output buckets. From each output bucket, it downloads all the dlio"
+ " output logs from gs://<bucket>/logs/ localy to"
+ f" {LOCAL_LOGS_LOCATION} and parses them for dlio test runs and their"
+ " output metrics."
+ ),
+ )
+ parser.add_argument("--workload-config")
+ parser.add_argument(
+ "--project-number",
+ help=(
+ "project-number (e.g. 93817472919) is needed to fetch the cpu/memory"
+ " utilization data from GCP."
+ ),
+ )
+ args = parser.parse_args()
+
  try:
  os.makedirs(LOCAL_LOGS_LOCATION)
  except FileExistsError:
@@ -155,10 +176,16 @@ def downloadDlioOutputs(dlioWorkloads):
  r["end"] = standard_timestamp(per_epoch_stats_data[str(i + 1)]["end"])
  if r["scenario"] != "local-ssd" and mash_installed:
  r["lowest_memory"], r["highest_memory"] = get_memory(
- r["pod_name"], r["start"], r["end"]
+ r["pod_name"],
+ r["start"],
+ r["end"],
+ project_number=args.project_number,
  )
  r["lowest_cpu"], r["highest_cpu"] = get_cpu(
- r["pod_name"], r["start"], r["end"]
+ r["pod_name"],
+ r["start"],
+ r["end"],
+ project_number=args.project_number,
  )
  pass
 

diff --git a/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py b/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py
@@ -52,7 +52,8 @@ def downloadFioOutputs(fioWorkloads):
  os.makedirs(LOCAL_LOGS_LOCATION + "/" + fioWorkload.fileSize)
  except FileExistsError:
  pass
- print(f"Download FIO output from the folder {fioWorkload.bucket}...")
+
+ print(f"Downloading FIO outputs from {fioWorkload.bucket}...")
  result = subprocess.run(
  [
  "gsutil",
@@ -71,6 +72,26 @@ def downloadFioOutputs(fioWorkloads):
 
 
 if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ prog="DLIO Unet3d test output parser",
+ description=(
+ "This program takes in a json test-config file and parses it for"
+ " output buckets.From each output bucket, it downloads all the dlio"
+ " output logs from gs://<bucket>/logs/ locally to"
+ f" {LOCAL_LOGS_LOCATION} and parses them for dlio test runs and their"
+ " output metrics."
+ ),
+ )
+ parser.add_argument("--workload-config")
+ parser.add_argument(
+ "--project-number",
+ help=(
+ "project-number (93817472919) is needed to fetch the cpu/memory"
+ " utilization data from GCP."
+ ),
+ )
+ args = parser.parse_args()
+
  try:
  os.makedirs(LOCAL_LOGS_LOCATION)
  except FileExistsError:
@@ -161,10 +182,16 @@ def downloadFioOutputs(fioWorkloads):
  r["end"] = unix_to_timestamp(per_epoch_output_data["timestamp_ms"])
  if r["scenario"] != "local-ssd" and mash_installed:
  r["lowest_memory"], r["highest_memory"] = get_memory(
- r["pod_name"], r["start"], r["end"]
+ r["pod_name"],
+ r["start"],
+ r["end"],
+ project_number=args.project_number,
  )
  r["lowest_cpu"], r["highest_cpu"] = get_cpu(
- r["pod_name"], r["start"], r["end"]
+ r["pod_name"],
+ r["start"],
+ r["end"],
+ project_number=args.project_number,
  )
  pass
  r["gcsfuse_mount_options"] = gcsfuse_mount_options

diff --git a/perfmetrics/scripts/testing_on_gke/examples/utils/utils.py b/perfmetrics/scripts/testing_on_gke/examples/utils/utils.py
@@ -25,10 +25,10 @@ def is_mash_installed() -> bool:
  except subprocess.CalledProcessError:
  return False
 
-def get_memory(pod_name: str, start: str, end: str) -> Tuple[int, int]:
+def get_memory(pod_name: str, start: str, end: str, project_number: int) -> Tuple[int, int]:
  # for some reason, the mash filter does not always work, so we fetch all the metrics for all the pods and filter later.
  result = subprocess.run(["mash", "--namespace=cloud_prod", "--output=csv",
- f"Query(Fetch(Raw('cloud.kubernetes.K8sContainer', 'kubernetes.io/container/memory/used_bytes'), {{'project': '927584127901', 'metric:memory_type': 'non-evictable'}})| Window(Align('10m'))| GroupBy(['pod_name', 'container_name'], Max()), TimeInterval('{start}', '{end}'), '5s')"],
+ f"Query(Fetch(Raw('cloud.kubernetes.K8sContainer', 'kubernetes.io/container/memory/used_bytes'), {{'project': '{project_number}', 'metric:memory_type': 'non-evictable'}})| Window(Align('10m'))| GroupBy(['pod_name', 'container_name'], Max()), TimeInterval('{start}', '{end}'), '5s')"], 
  capture_output=True, text=True)
 
  data_points_int = []
@@ -48,10 +48,10 @@ def get_memory(pod_name: str, start: str, end: str) -> Tuple[int, int]:
 
  return int(min(data_points_int) / 1024 ** 2) , int(max(data_points_int) / 1024 ** 2)
 
-def get_cpu(pod_name: str, start: str, end: str) -> Tuple[float, float]:
+def get_cpu(pod_name: str, start: str, end: str, project_number: int) -> Tuple[float, float]:
  # for some reason, the mash filter does not always work, so we fetch all the metrics for all the pods and filter later.
  result = subprocess.run(["mash", "--namespace=cloud_prod", "--output=csv",
- f"Query(Fetch(Raw('cloud.kubernetes.K8sContainer', 'kubernetes.io/container/cpu/core_usage_time'), {{'project': '927584127901'}})| Window(Rate('10m'))| GroupBy(['pod_name', 'container_name'], Max()), TimeInterval('{start}', '{end}'), '5s')"],
+ f"Query(Fetch(Raw('cloud.kubernetes.K8sContainer', 'kubernetes.io/container/cpu/core_usage_time'), {{'project': '{project_number}'}})| Window(Rate('10m'))| GroupBy(['pod_name', 'container_name'], Max()), TimeInterval('{start}', '{end}'), '5s')"], 
  capture_output=True, text=True)
 
  data_points_float = []