Skip to content

Commit

Permalink
add support for prject_number and config-file arguments in output-par…
Browse files Browse the repository at this point in the history
…sers
  • Loading branch information
gargnitingoogle committed Aug 5, 2024
1 parent 12049a5 commit d5c20fe
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 9 deletions.
31 changes: 29 additions & 2 deletions perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import json, os, pprint, subprocess
import sys
import dlio_workload
Expand Down Expand Up @@ -64,6 +65,26 @@ def downloadDlioOutputs(dlioWorkloads):


if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="DLIO Unet3d test output parser",
description=(
"This program takes in a json test-config file and parses it for"
" output buckets. From each output bucket, it downloads all the dlio"
" output logs from gs://<bucket>/logs/ localy to"
f" {LOCAL_LOGS_LOCATION} and parses them for dlio test runs and their"
" output metrics."
),
)
parser.add_argument("--workload-config")
parser.add_argument(
"--project-number",
help=(
"project-number (e.g. 93817472919) is needed to fetch the cpu/memory"
" utilization data from GCP."
),
)
args = parser.parse_args()

try:
os.makedirs(LOCAL_LOGS_LOCATION)
except FileExistsError:
Expand Down Expand Up @@ -155,10 +176,16 @@ def downloadDlioOutputs(dlioWorkloads):
r["end"] = standard_timestamp(per_epoch_stats_data[str(i + 1)]["end"])
if r["scenario"] != "local-ssd" and mash_installed:
r["lowest_memory"], r["highest_memory"] = get_memory(
r["pod_name"], r["start"], r["end"]
r["pod_name"],
r["start"],
r["end"],
project_number=args.project_number,
)
r["lowest_cpu"], r["highest_cpu"] = get_cpu(
r["pod_name"], r["start"], r["end"]
r["pod_name"],
r["start"],
r["end"],
project_number=args.project_number,
)
pass

Expand Down
33 changes: 30 additions & 3 deletions perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ def downloadFioOutputs(fioWorkloads):
os.makedirs(LOCAL_LOGS_LOCATION + "/" + fioWorkload.fileSize)
except FileExistsError:
pass
print(f"Download FIO output from the folder {fioWorkload.bucket}...")

print(f"Downloading FIO outputs from {fioWorkload.bucket}...")
result = subprocess.run(
[
"gsutil",
Expand All @@ -71,6 +72,26 @@ def downloadFioOutputs(fioWorkloads):


if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="DLIO Unet3d test output parser",
description=(
"This program takes in a json test-config file and parses it for"
" output buckets.From each output bucket, it downloads all the dlio"
" output logs from gs://<bucket>/logs/ locally to"
f" {LOCAL_LOGS_LOCATION} and parses them for dlio test runs and their"
" output metrics."
),
)
parser.add_argument("--workload-config")
parser.add_argument(
"--project-number",
help=(
"project-number (93817472919) is needed to fetch the cpu/memory"
" utilization data from GCP."
),
)
args = parser.parse_args()

try:
os.makedirs(LOCAL_LOGS_LOCATION)
except FileExistsError:
Expand Down Expand Up @@ -161,10 +182,16 @@ def downloadFioOutputs(fioWorkloads):
r["end"] = unix_to_timestamp(per_epoch_output_data["timestamp_ms"])
if r["scenario"] != "local-ssd" and mash_installed:
r["lowest_memory"], r["highest_memory"] = get_memory(
r["pod_name"], r["start"], r["end"]
r["pod_name"],
r["start"],
r["end"],
project_number=args.project_number,
)
r["lowest_cpu"], r["highest_cpu"] = get_cpu(
r["pod_name"], r["start"], r["end"]
r["pod_name"],
r["start"],
r["end"],
project_number=args.project_number,
)
pass
r["gcsfuse_mount_options"] = gcsfuse_mount_options
Expand Down
8 changes: 4 additions & 4 deletions perfmetrics/scripts/testing_on_gke/examples/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ def is_mash_installed() -> bool:
except subprocess.CalledProcessError:
return False

def get_memory(pod_name: str, start: str, end: str) -> Tuple[int, int]:
def get_memory(pod_name: str, start: str, end: str, project_number: int) -> Tuple[int, int]:
# for some reason, the mash filter does not always work, so we fetch all the metrics for all the pods and filter later.
result = subprocess.run(["mash", "--namespace=cloud_prod", "--output=csv",
f"Query(Fetch(Raw('cloud.kubernetes.K8sContainer', 'kubernetes.io/container/memory/used_bytes'), {{'project': '927584127901', 'metric:memory_type': 'non-evictable'}})| Window(Align('10m'))| GroupBy(['pod_name', 'container_name'], Max()), TimeInterval('{start}', '{end}'), '5s')"],
f"Query(Fetch(Raw('cloud.kubernetes.K8sContainer', 'kubernetes.io/container/memory/used_bytes'), {{'project': '{project_number}', 'metric:memory_type': 'non-evictable'}})| Window(Align('10m'))| GroupBy(['pod_name', 'container_name'], Max()), TimeInterval('{start}', '{end}'), '5s')"],
capture_output=True, text=True)

data_points_int = []
Expand All @@ -48,10 +48,10 @@ def get_memory(pod_name: str, start: str, end: str) -> Tuple[int, int]:

return int(min(data_points_int) / 1024 ** 2) , int(max(data_points_int) / 1024 ** 2)

def get_cpu(pod_name: str, start: str, end: str) -> Tuple[float, float]:
def get_cpu(pod_name: str, start: str, end: str, project_number: int) -> Tuple[float, float]:
# for some reason, the mash filter does not always work, so we fetch all the metrics for all the pods and filter later.
result = subprocess.run(["mash", "--namespace=cloud_prod", "--output=csv",
f"Query(Fetch(Raw('cloud.kubernetes.K8sContainer', 'kubernetes.io/container/cpu/core_usage_time'), {{'project': '927584127901'}})| Window(Rate('10m'))| GroupBy(['pod_name', 'container_name'], Max()), TimeInterval('{start}', '{end}'), '5s')"],
f"Query(Fetch(Raw('cloud.kubernetes.K8sContainer', 'kubernetes.io/container/cpu/core_usage_time'), {{'project': '{project_number}'}})| Window(Rate('10m'))| GroupBy(['pod_name', 'container_name'], Max()), TimeInterval('{start}', '{end}'), '5s')"],
capture_output=True, text=True)

data_points_float = []
Expand Down

0 comments on commit d5c20fe

Please sign in to comment.