diff --git a/benchmarks.py b/agbenchmark/benchmarks.py similarity index 69% rename from benchmarks.py rename to agbenchmark/benchmarks.py index cd1118a21..a4e6f4c93 100644 --- a/benchmarks.py +++ b/agbenchmark/benchmarks.py @@ -7,19 +7,20 @@ def run_specific_agent(task: str) -> Tuple[str, int]: # Construct the command - command = ['python', 'main_no_modal.py', task] + command = ["python", "main_no_modal.py", task] subprocess.run(command, text=True) + def execute_generated_files(): # Navigate to generated directory - os.chdir('generated') + os.chdir("generated") # Iterate over every .txt file in the directory - for file_name in glob.glob('*.txt'): - with open(file_name, 'r') as file: + for file_name in glob.glob("../*.txt"): + with open(file_name, "r") as file: python_code = file.read() - python_code = python_code.replace('```python', '') - python_code = python_code.replace('```', '') + python_code = python_code.replace("```python", "") + python_code = python_code.replace("```", "") # Execute the code in the .txt file exec(python_code) diff --git a/agbenchmark/config.json b/agbenchmark/config.json new file mode 100644 index 000000000..bc89d5ef4 --- /dev/null +++ b/agbenchmark/config.json @@ -0,0 +1,5 @@ +{ + "workspace": "generated", + "entry_path": "agbenchmark/benchmarks.py", + "cutoff": 60 +} diff --git a/regression_tests.json b/agbenchmark/regression_tests.json similarity index 51% rename from regression_tests.json rename to agbenchmark/regression_tests.json index e3633a2af..c6434ffc0 100644 --- a/regression_tests.json +++ b/agbenchmark/regression_tests.json @@ -2,6 +2,7 @@ "TestWriteFile": { "difficulty": "basic", "dependencies": [], - "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py" + "test": "agbenchmark/challenges/interface/write_file", + "success": true } } \ No newline at end of file diff --git a/agbenchmark/reports/1.json b/agbenchmark/reports/1.json new file mode 100644 index 000000000..48a631c5b --- /dev/null +++ b/agbenchmark/reports/1.json @@ -0,0 +1,18 @@ +{ + "command": "agbenchmark start --test TestWriteFile --mock", + "completion_time": "2023-07-10-21:19", + "time_elapsed": "8.34 seconds", + "tests": { + "TestWriteFile": { + "difficulty": "basic", + "dependencies": [], + "test": "agbenchmark/challenges/interface/write_file", + "success": true + } + }, + "config": { + "workspace": "generated", + "entry_path": "agbenchmark/benchmarks.py", + "cutoff": 60 + } +} \ No newline at end of file diff --git a/config.json b/config.json deleted file mode 100644 index d0a72bd85..000000000 --- a/config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "workspace": "generated", - "entry_path": "benchmarks.py", - "home_path": "", - "cutoff": 60 -} diff --git a/generated/.gitkeep b/generated/.gitkeep deleted file mode 100644 index 4eb9b6ffa..000000000 --- a/generated/.gitkeep +++ /dev/null @@ -1,3 +0,0 @@ -# generated folder - -by default, `main.py` will generate the app in this folder (you can customize with the `--directory=newFolderHere` flag). \ No newline at end of file