Skip to content

Commit

Permalink
Merge pull request #37 from JuniMay/Benchmarks
Browse files Browse the repository at this point in the history
Benchmarks
  • Loading branch information
JuniMay authored Aug 20, 2024
2 parents ff8869a + 32e366b commit 98fa6a9
Show file tree
Hide file tree
Showing 109 changed files with 5,720 additions and 3 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@

/tests/sysy/*
!/tests/sysy/sysy-runtime-lib
!/tests/sysy/sysy-runtime-lib-fix
!/tests/sysy/functional
!/tests/sysy/hidden-functional
!/tests/sysy/Benchmarks

/tests/testcases/*

/output*/

Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,11 @@ target remote :1234
```

之后调试即可。

## Benchmarks

This directory contains a set of benchmarks that are used to evaluate the performance of the compiler. The benchmarks are transformed into the SysY language from the original C code. The benchmarks are divided into three categories as follows:

- [PolyBench/C 4.2](https://master.dl.sourceforge.net/project/polybench/polybench-c-4.2.tar.gz?viasf=1) -> **23** cases
- [Livermore loops](https://www.netlib.org/benchmark/livermorec) -> **6** cases
- [Mälardalen WCET Benchmarks](http://www.mrtc.mdh.se/projects/wcet/benchmarks.html) -> **2** cases
20 changes: 20 additions & 0 deletions scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,23 @@ gcc hexfloat.c -o hexfloat
## execute.py

A script to run the compiler on a batch of source codes.

## build_table.py

A script to build a time table to compare diff outputs of the compiler.

## compare_result.py

A script to compare two diff row of the result csv file.

## merge_cases.py

A script to merge the testcases (function & performance) into one union file.

## merge_file.py

A script to merge more testcases using hash value ( tuple(hash(xx.in), hash(xx.sy), hash(xx.out)) ).

## extract_time_record.py

A script to extract the time record from the log file.
41 changes: 41 additions & 0 deletions scripts/build_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os
import re
import pandas as pd

# Regular expression pattern to match time format
time_pattern = re.compile(r"(\d+)H-(\d+)M-(\d+)S-(\d+)us")

# Function to parse a log file and extract the time data
def parse_log_file(filename):
data = {}
with open(f"./tests/testcases/{filename}", 'r') as file:
for line in file:
if ':' in line:
name, time_str = line.split(':')
name = name.strip()
time_match = time_pattern.search(time_str)
if time_match:
hours, minutes, seconds, microseconds = map(int, time_match.groups())
total_time_in_seconds = hours * 3600 + minutes * 60 + seconds + microseconds / 1e6
data[name] = total_time_in_seconds
return data

# List all log files in the directory
log_files = [f for f in os.listdir("./tests/testcases/") if f.endswith('.txt')]

# Initialize an empty DataFrame
df = pd.DataFrame()

# Process each log file and add the data to the DataFrame
for log_file in log_files:
log_data = parse_log_file(log_file)
df[log_file] = pd.Series(log_data)

# Replace missing values with a hyphen
df.fillna("-", inplace=True)
df.index.name = "Log Item"

# Save the DataFrame as a CSV file
df.to_csv("./tests/testcases/table.csv")

print("Log files combined into 'table.csv'")
44 changes: 44 additions & 0 deletions scripts/compare_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import csv

def compare_rows(csv_file, row1, row2):
with open(csv_file, 'r') as file:
reader = csv.reader(file)
rows = list(reader)
if row1 < 0 or row1 >= len(rows) or row2 < 0 or row2 >= len(rows):
print("Invalid row numbers")
return
headers = rows[0]
data1 = rows[row1]
data2 = rows[row2]
print(f"Row1 commit message: {data1[1]}Row2 commit message: {data2[1]}")

for i in range(2, len(headers)): # skip first 2
header = headers[i]
value1_str = data1[i].replace('(AC)', '').strip()
value2_str = data2[i].replace('(AC)', '').strip()
try:
value1 = float(value1_str)
value2 = float(value2_str)
diff = value2 - value1
diff = diff / value1 * 100
print(f"Difference in {header} (row2 - row1)/row1: {diff}%")
except ValueError:
print(f"Error converting values in {header}: '{data1[i]}' and '{data2[i]}'")

def print_row_numbers_and_second_column(csv_file):
with open(csv_file, 'r') as file:
reader = csv.reader(file)
for index, row in enumerate(reader):
if len(row) > 1: # 确保有第二列
print(f"Row {index}: {row[1]}")
else:
print(f"Row {index}: No second column")

csv_file = './tests/testcases/result.csv'

print_row_numbers_and_second_column(csv_file)

row1 = int(input("Enter row1 number: "))
row2 = int(input("Enter row2 number: "))

compare_rows(csv_file, row1, row2)
50 changes: 50 additions & 0 deletions scripts/extract_time_record.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os
import re

def parse_time(time_str):
pattern = r" (\d+)H-(\d+)M-(\d+)S-(\d+)us"
match = re.match(pattern, time_str)
if match:
hours = int(match.group(1))
minutes = int(match.group(2))
seconds = int(match.group(3))
microseconds = int(match.group(4))
total_seconds = hours * 3600 + minutes * 60 + seconds + microseconds / 1_000_000
return total_seconds
else:
print(f"Error parsing time: {time_str}") # 添加调试信息
raise ValueError("Invalid time format")

def extract_total_time(log_directory, output_file, custom_message):
log_files = sorted([f for f in os.listdir(log_directory) if f.endswith('.log')])
overall_total_time = 0

with open(output_file, 'w') as output:
output.write(custom_message + '\n\n')

for log_file in log_files:
log_path = os.path.join(log_directory, log_file)
log_file = log_file.replace('.log', '')
last_total_line = None

with open(log_path, 'r') as file:
for line in file:
if line.startswith('TOTAL'):
last_total_line = line.strip()

last_total_line = last_total_line.replace('TOTAL:', '') if last_total_line else None
if last_total_line:
print(f"{log_file}: {last_total_line}")
output.write(f"{log_file}: {last_total_line}\n")
try:
overall_total_time += parse_time(last_total_line)
except ValueError as e:
print(f"Skipping {log_file} due to parsing error: {e}")

output.write(f"\nOverall Total Time: {overall_total_time:.6f} seconds\n")

log_directory = '/Users/fengsicheng/Desktop/orzcc/output'
output_file = './tests/testcases/time_record_newest.txt'
custom_message = "Total time for each log file:"

extract_total_time(log_directory, output_file, custom_message)
56 changes: 56 additions & 0 deletions scripts/merge_cases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import os
import shutil

# merge all performance cases (include the final performance cases) into one directory
# source_dirs = [
# './tests/testcases/2023/performance',
# './tests/testcases/2022/performance',
# './tests/testcases/2021/performance_test2021-private',
# './tests/testcases/2021/performance_test2021-public',
# './tests/testcases/2021/2021初赛所有用例/performance',
# './tests/testcases/2020/section1/performance_test',
# './tests/testcases/2020/section2/performance_test',
# './tests/testcases/2023/final_performance',
# './tests/testcases/2022/final_performance',
# ]
# output_dir = './tests/testcases/union/performance'

# merge all functional cases into one directory
source_dirs = [
'./tests/testcases/2023/functional',
'./tests/testcases/2023/hidden_functional',
'./tests/testcases/2022/functional',
'./tests/testcases/2022/hidden_functional',
'./tests/testcases/2021/function_test2020',
'./tests/testcases/2021/function_test2021',
'./tests/testcases/2021/2021初赛所有用例/functional',
'./tests/testcases/2021/2021初赛所有用例/h_functional',
'./tests/testcases/2020/section1/functional_test',
]

output_dir = './tests/testcases/union/function'


# 创建输出目录如果它不存在
os.makedirs(output_dir, exist_ok=True)

# 创建一个集合以存储已经处理过的文件名,防止重复复制
handled_files = set()

# 定义一个函数来复制目录中的文件
def copy_files(source_dir):
for file_name in os.listdir(source_dir):
# 检查文件是否已处理
if file_name not in handled_files:
# 文件未处理,添加到集合并复制到目标目录
handled_files.add(file_name)
source_file = os.path.join(source_dir, file_name)
target_file = os.path.join(output_dir, file_name)
# 复制文件
shutil.copy(source_file, target_file)

# 循环处理每个源目录
for dir_path in source_dirs:
copy_files(dir_path)

print("所有文件已成功复制到", output_dir)
55 changes: 55 additions & 0 deletions scripts/merge_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import os
import hashlib

def hash_file(file_path):
"""Calculate the hash value of a file."""
hasher = hashlib.sha256()
if os.path.exists(file_path):
with open(file_path, 'rb') as f:
buf = f.read()
hasher.update(buf)
else:
# If file does not exist, return hash of empty string
hasher.update(b'')
return hasher.hexdigest()

def remove_duplicate_files(directory):
# Get a list of all files with the .sy extension in the directory
files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.sy') and os.path.isfile(os.path.join(directory, f))]

hash_dict = {}

for file_path in files:
# Hash the .sy file
sy_hash = hash_file(file_path)

# Hash the corresponding .in and .out files
file_name = os.path.splitext(file_path)[0]
in_file = file_name + '.in'
out_file = file_name + '.out'

in_hash = hash_file(in_file)
out_hash = hash_file(out_file)

# Combine the three hashes into a tuple
combined_hash = (sy_hash, in_hash, out_hash)

if combined_hash in hash_dict:
# If the combined hash value exists, remove the duplicate files
os.remove(file_path)
print(f"Removed duplicate file: {file_path}")

if os.path.exists(out_file):
os.remove(out_file)
print(f"Removed duplicate file: {out_file}")

if os.path.exists(in_file):
os.remove(in_file)
print(f"Removed duplicate file: {in_file}")
else:
hash_dict[combined_hash] = file_path

# Example usage:
# directory = '/Users/fengsicheng/Desktop/orzcc/tests/testcases/merged_union/function'
directory = '/Users/fengsicheng/Desktop/orzcc/tests/testcases/merged_union/performance'
remove_duplicate_files(directory)
8 changes: 5 additions & 3 deletions scripts/test_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,10 @@ def test_one(test_case, timeout, opt_level, output_dir, executable_path, runtime
log_path = os.path.join(output_dir, f"{basename}.log")
log_file = open(log_path, "w")

test_case_clang = test_case.replace(".sy", ".fsc")

command = (
f"clang -w -xc {test_case} "
f"clang -w -xc -Wno-implicit-function-declaration {test_case_clang} "
f"./tests/sysy/sysy-runtime-lib-fix/sylib.c "
f"-o {std_exec_path}"
)
Expand Down Expand Up @@ -124,10 +126,10 @@ def test_one(test_case, timeout, opt_level, output_dir, executable_path, runtime
return False

command = (
(f"qemu-riscv64 -L /usr/riscv64-linux-gnu {exec_path}" f" >{out_path}")
(f"qemu-riscv64 -cpu rv64,zba=true,zbb=true -L /usr/riscv64-linux-gnu {exec_path}" f" >{out_path}")
if in_path is None
else (
f"qemu-riscv64 -L /usr/riscv64-linux-gnu {exec_path}"
f"qemu-riscv64 -cpu rv64,zba=true,zbb=true -L /usr/riscv64-linux-gnu {exec_path}"
f" <{in_path} >{out_path}"
)
)
Expand Down
Loading

0 comments on commit 98fa6a9

Please sign in to comment.