From c52b14b1d5b1b74d886f08d9914e7f43437f609d Mon Sep 17 00:00:00 2001 From: Silen Naihin Date: Mon, 10 Jul 2023 21:36:25 -0400 Subject: [PATCH] add reports, consolidate, update benchmark files --- benchmarks.py => agbenchmark/benchmarks.py | 13 +++++++------ agbenchmark/config.json | 5 +++++ .../regression_tests.json | 3 ++- agbenchmark/reports/1.json | 18 ++++++++++++++++++ config.json | 6 ------ generated/.gitkeep | 3 --- 6 files changed, 32 insertions(+), 16 deletions(-) rename benchmarks.py => agbenchmark/benchmarks.py (69%) create mode 100644 agbenchmark/config.json rename regression_tests.json => agbenchmark/regression_tests.json (51%) create mode 100644 agbenchmark/reports/1.json delete mode 100644 config.json delete mode 100644 generated/.gitkeep diff --git a/benchmarks.py b/agbenchmark/benchmarks.py similarity index 69% rename from benchmarks.py rename to agbenchmark/benchmarks.py index cd1118a21..a4e6f4c93 100644 --- a/benchmarks.py +++ b/agbenchmark/benchmarks.py @@ -7,19 +7,20 @@ def run_specific_agent(task: str) -> Tuple[str, int]: # Construct the command - command = ['python', 'main_no_modal.py', task] + command = ["python", "main_no_modal.py", task] subprocess.run(command, text=True) + def execute_generated_files(): # Navigate to generated directory - os.chdir('generated') + os.chdir("generated") # Iterate over every .txt file in the directory - for file_name in glob.glob('*.txt'): - with open(file_name, 'r') as file: + for file_name in glob.glob("../*.txt"): + with open(file_name, "r") as file: python_code = file.read() - python_code = python_code.replace('```python', '') - python_code = python_code.replace('```', '') + python_code = python_code.replace("```python", "") + python_code = python_code.replace("```", "") # Execute the code in the .txt file exec(python_code) diff --git a/agbenchmark/config.json b/agbenchmark/config.json new file mode 100644 index 000000000..bc89d5ef4 --- /dev/null +++ b/agbenchmark/config.json @@ -0,0 +1,5 @@ +{ + "workspace": "generated", + "entry_path": "agbenchmark/benchmarks.py", + "cutoff": 60 +} diff --git a/regression_tests.json b/agbenchmark/regression_tests.json similarity index 51% rename from regression_tests.json rename to agbenchmark/regression_tests.json index e3633a2af..c6434ffc0 100644 --- a/regression_tests.json +++ b/agbenchmark/regression_tests.json @@ -2,6 +2,7 @@ "TestWriteFile": { "difficulty": "basic", "dependencies": [], - "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py" + "test": "agbenchmark/challenges/interface/write_file", + "success": true } } \ No newline at end of file diff --git a/agbenchmark/reports/1.json b/agbenchmark/reports/1.json new file mode 100644 index 000000000..48a631c5b --- /dev/null +++ b/agbenchmark/reports/1.json @@ -0,0 +1,18 @@ +{ + "command": "agbenchmark start --test TestWriteFile --mock", + "completion_time": "2023-07-10-21:19", + "time_elapsed": "8.34 seconds", + "tests": { + "TestWriteFile": { + "difficulty": "basic", + "dependencies": [], + "test": "agbenchmark/challenges/interface/write_file", + "success": true + } + }, + "config": { + "workspace": "generated", + "entry_path": "agbenchmark/benchmarks.py", + "cutoff": 60 + } +} \ No newline at end of file diff --git a/config.json b/config.json deleted file mode 100644 index d0a72bd85..000000000 --- a/config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "workspace": "generated", - "entry_path": "benchmarks.py", - "home_path": "", - "cutoff": 60 -} diff --git a/generated/.gitkeep b/generated/.gitkeep deleted file mode 100644 index 4eb9b6ffa..000000000 --- a/generated/.gitkeep +++ /dev/null @@ -1,3 +0,0 @@ -# generated folder - -by default, `main.py` will generate the app in this folder (you can customize with the `--directory=newFolderHere` flag). \ No newline at end of file