Skip to content

Commit

Permalink
version 1.9 public release
Browse files Browse the repository at this point in the history
  • Loading branch information
chengniansun committed Jan 10, 2025
1 parent f406c05 commit fa3f75a
Show file tree
Hide file tree
Showing 186 changed files with 101,662 additions and 0 deletions.
Empty file added MODULE.bazel
Empty file.
79 changes: 79 additions & 0 deletions MODULE.bazel.kept_for_future
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
module(
name = "your_project_name",
version = "1.0.0",
)

# Rules for Maven JVM dependencies
bazel_dep(name = "rules_jvm_external", version = "6.3")

# Local the Maven extension
maven = use_extension("@rules_jvm_external//:extensions.bzl", "maven")

# Declare Maven artifacts.
maven.install(
artifacts = [
"com.beust:jcommander:1.82",
"com.fasterxml.jackson.core:jackson-core:2.18.2",
"com.fasterxml.jackson.core:jackson-databind:2.18.2",
"com.fasterxml.jackson.core:jackson-annotations:2.18.2",
"com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.18.2",
"com.fasterxml.jackson.datatype:jackson-datatype-guava:2.18.2",
"com.fasterxml.jackson.module:jackson-module-kotlin:2.18.2",
"com.google.errorprone:error_prone_annotations:2.3.4",
"com.google.flogger:flogger-system-backend:0.8",
"com.google.flogger:flogger:0.8",
"com.google.googlejavaformat:google-java-format:1.15.0",
"com.google.guava:guava:32.0.0-jre",
"com.google.truth:truth:1.1.4",
"com.googlecode.java-diff-utils:diffutils:1.3.0",
"com.guardsquare:proguard-base:7.2.1",
"com.github.gumtreediff:core:3.0.0",
"com.pinterest:ktlint:0.50.0",
"io.gitlab.arturbosch.detekt:detekt-cli:1.23.1",
"io.netty:netty-all:4.1.66.Final",
"it.unimi.dsi:fastutil:8.5.12",
"me.lemire.integercompression:JavaFastPFOR:0.1.9",
"org.antlr:antlr4-runtime:4.13.2",
"org.antlr:antlr4:4.13.2",
"org.apache.commons:commons-csv:1.10.0",
"org.apache.commons:commons-exec:1.4.0",
"org.apache.commons:commons-lang3:3.9",
"org.apache.commons:commons-text:1.9",
"org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.9.10",
"org.jetbrains.kotlin:kotlin-reflect:1.9.10",
"org.jfree:jfreechart:1.5.0",
"org.jgrapht:jgrapht-core:1.3.0",
"org.ow2.asm:asm:9.3",
"org.ow2.asm:asm-commons:9.3",
"org.ow2.asm:asm-util:9.3",
"junit:junit:4.13.2",
"org.commonmark:commonmark:0.18.1",
],
fetch_sources = True,
repositories = [
"https://jcenter.bintray.com",
"https://maven.google.com",
"https://repo1.maven.org/maven2",
],
)

# Finalize the Maven extension.
use_repo(maven, "maven")

# Rules for Kotlin
bazel_dep(name = "rules_kotlin", version = "2.0.0")

# Rules for Go
bazel_dep(name = "rules_go", version = "0.45.1")

# Rules for Gazelle (for Go)
bazel_dep(name = "gazelle", version = "0.34.0")

# Rules for Protobuf
bazel_dep(name = "protobuf", version = "29.0")
bazel_dep(name = "rules_proto", version = "7.0.2")

# Rules for Buildtools
bazel_dep(name = "buildifier_prebuilt", version = "7.3.1")

register_toolchains("//:kotlin_toolchain")
110 changes: 110 additions & 0 deletions MODULE.bazel.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

188 changes: 188 additions & 0 deletions benchmark/convert_memory_log_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#!/usr/bin/env python3
import os
import json
import argparse
from typing import List, Final, Tuple, Dict

__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))


def parse_arguments():
parser = argparse.ArgumentParser(description="Summarize benchmark results (in Byte)"
"This is used to extract the data from log and json file."
"The output file can be further analyzed by analyze_csv_memory.py")
parser.add_argument("folders", nargs='+', default=[], help="Folders of json and log files, "
"while json and log files should be paired")
parser.add_argument("-o", type=str, dest="output_csv", required=True, help="output csv file")

return parser.parse_args()


def extract_entries(lines: List[str]) -> Tuple[List[int], List[int]]:
# cache samples stores in form: '<timestamp> <size>'
timestamp = []
cache = []
for line in lines:
line.rstrip()
t, c = line.split()
timestamp.append(int(t))
cache.append(int(c))
return timestamp, cache


def average_usage(timestamp: List[int], cache: List[int]) -> int:
# calculate average function value
# average = area under curve / period
assert len(timestamp) == len(cache)
if len(timestamp) == 0:
return -1
elif len(timestamp) == 1:
return cache[0]
total_usage = 0
previous_time = timestamp[0]
previous_usage = cache[0]
l = len(timestamp)
for i in range(1, l):
delta_time = timestamp[i] - previous_time
usage_sum = cache[i] + previous_usage
# trapezoid rule
total_usage += usage_sum / 2 * delta_time
previous_time = timestamp[i]
previous_usage = cache[i]

return int(total_usage / (timestamp[-1] - timestamp[0]))


def average_dict_item(orig_dict: dict, update_dict: dict, key: str):
raise NotImplementedError("This function is not yet implemented.")
# this function is called in somewhere of this file
# however, at least in the experiments of rcc, this function is never triggered
# and thus I dont know its intention and do not know how to implement it.
# the following one is wrong. since (a+b+c)/3 is not equal to ((a+b)/2 + c)/2 = a/4+b/4+c/2
# orig_dict[key] += update_dict[key]
# orig_dict[key] //= 2


def analyze_json_file(filepath: str, statistics_handle: dict):
# decode filename
base = os.path.basename(filepath)
base = os.path.splitext(base)[0]
buf, subject, reducer, timemark, iter = base.split('_')
reducer_t = f"{reducer}@{timemark}"

# read json file to a map
with open(filepath) as f:
json_dict = json.load(f)

# trim redundant information
if subject != json_dict['Subject']:
raise Exception(f"Error: File name and content inconsistent: Subject. Please check {filepath}")
json_dict.pop('Subject')
if reducer != json_dict['Reducer']:
raise Exception(f"Error: File name and content inconsistent: REDUCER. Please check {filepath}")
json_dict.pop('Reducer')

# dictionary keys
QUERY: Final = "Query"
TIME: Final = "Time"
TOKEN_R: Final = "Token_remaining"

# append any new data to statistics report
if subject not in statistics_handle:
statistics_handle[subject] = dict()
if reducer_t not in statistics_handle[subject]:
statistics_handle[subject][reducer_t] = json_dict
else:
# average if existing already
sub_dict = statistics_handle[subject][reducer_t]
average_dict_item(sub_dict, json_dict, QUERY)
average_dict_item(sub_dict, json_dict, TIME)
average_dict_item(sub_dict, json_dict, TOKEN_R)


def analyze_log_file(filepath: str, statistics: dict):
# decode filename
base = os.path.basename(filepath)
base = os.path.splitext(base)[0]
buf, subject, reducer, timemark, iter = base.split('_')
reducer_t = f"{reducer}@{timemark}"

# read log file to a list
with open(filepath) as f:
lines = f.readlines()

# extract memory info from log file
timestamp, cache = extract_entries(lines)

# dictionary keys
PEAK: Final = "peak_cache_size"
AVG: Final = "average_memory_usage"
NUM_SAMPLES: Final = "number_of_samples"

# calculate memory usages and store in map
log_dict = dict()
log_dict[PEAK] = max(cache)
log_dict[AVG] = average_usage(timestamp, cache)
log_dict[NUM_SAMPLES] = len(lines)

if PEAK not in statistics[subject][reducer_t]:
statistics[subject][reducer_t].update(log_dict)
else:
sub_dict = statistics[subject][reducer_t]
average_dict_item(sub_dict, log_dict, PEAK)
average_dict_item(sub_dict, log_dict, AVG)
average_dict_item(sub_dict, log_dict, NUM_SAMPLES)


def validate_existence(filepath: str):
if not os.path.exists(filepath):
raise Exception(f"Error: File not found in path: {filepath}")


def pair_files(folder_list: List[str]) -> Dict[str, str]:
"""result json and profiling log file should appear in a pair"""
folder_list.sort()
files_paired = dict()
for folder in folder_list:
for dirpath, dirnames, filenames in os.walk(folder):
for filename in filenames:
if filename.endswith('.json'):
json_file = os.path.join(dirpath, filename)
validate_existence(json_file)
log_file = json_file.replace(".json", ".log")
validate_existence(log_file)
files_paired[json_file] = log_file
return files_paired

def save_to_csv_file(statistics, csv_file):
with open(csv_file, "w") as output_file:
output_file.write("Subject,Entry_peak_cache,Average_cache,Entry_environment\n")

for subject in statistics.keys():
entries = statistics[subject]
for key in entries.keys():
entry_environment = entries[key]["Environment"]
entry_peak_cache = entries[key]["peak_cache_size"]
average_cache = entries[key]["average_memory_usage"]
output_file.write(f"{subject},{entry_peak_cache},{average_cache},{entry_environment}\n")


def main():
args = parse_arguments()
statistics = dict()

files_paired = pair_files(args.folders)

# paired input files
for json_file, log_file in files_paired.items():
analyze_json_file(json_file, statistics)
analyze_log_file(log_file, statistics)

json_object = json.dumps(statistics, indent=4)
# print(json_object)

save_to_csv_file(statistics, args.output_csv)


if __name__ == "__main__":
main()
Loading

0 comments on commit fa3f75a

Please sign in to comment.