diff --git a/.feature/files.yml b/.feature/files.yml new file mode 100644 index 0000000000..78fa862183 --- /dev/null +++ b/.feature/files.yml @@ -0,0 +1,253 @@ +# Uncomment any files you would like to use for this feature +# Note that (./) is a special key which represents files at the root of the parent directory + +.feature: + #- files.yml +.github: + ISSUE_TEMPLATE: + #- bug-report.md + #- documentation-clarification.md + #- feature-request.md + PULL_REQUEST_TEMPLATE: + #- PULL_REQUEST_TEMPLATE.md + workflows: + #- automation.yml + #- ci.yaml + #- pre-commit.yaml + #- release.yaml + (./): + #- CODEOWNERS + #- CODE_OF_CONDUCT.md + #- CONTRIBUTING.md + #- FUNDING.yml +docker: + #- Dockerfile + #- README.md + #- entrypoint.sh +docs: + examples: + open_llms: + #- README.md + #- langchain_interface.py + #- openai_api_interface.py + (./): + #- Makefile + #- api_reference.rst + #- code_conduct_link.rst + #- conf.py + #- contributing_link.rst + #- create_api_rst.py + #- disclaimer_link.rst + #- docs_building.md + #- index.rst + #- installation.rst + #- introduction.md + #- make.bat + #- open_models.md + #- quickstart.rst + #- roadmap_link.rst + #- terms_link.rst + #- tracing_debugging.md + #- windows_readme_link.rst +gpt_engineer: + applications: + cli: + #- __init__.py + #- cli_agent.py + #- collect.py + #- file_selector.py + #- learning.py + #- main.py + feature_cli: + agents: + - __init__.py + - agent_steps.py + - chat_agent.py + - feature_agent.py + prompts: + - __init__.py + - fuzzy_file_parser + (./): + - __init__.py + - domain.py + - feature.py + - file_selection.py + - files.py + - generation_tools.py + - main.py + - repository.py + (./): + #- __init__.py + benchmark: + benchmarks: + apps: + #- load.py + #- problem.py + #- problems.py + gptme: + #- load.py + mbpp: + #- load.py + #- problem.py + #- problems.py + (./): + #- load.py + (./): + #- __init__.py + #- __main__.py + #- bench_config.py + #- default_bench_config.toml + #- run.py + #- types.py + core: + default: + #- __init__.py + #- constants.py + #- disk_execution_env.py + #- disk_memory.py + #- file_store.py + #- paths.py + #- simple_agent.py + #- steps.py + (./): + #- __init__.py + #- ai.py + #- base_agent.py + #- base_execution_env.py + #- base_memory.py + #- chat_to_files.py + #- diff.py + #- files_dict.py + #- git.py + #- linting.py + #- preprompts_holder.py + #- project_config.py + #- prompt.py + #- token_usage.py + #- version_manager.py + preprompts: + #- clarify + #- entrypoint + #- file_format + #- file_format_diff + #- file_format_fix + #- generate + #- improve + #- philosophy + #- roadmap + tools: + #- __init__.py + #- custom_steps.py + #- supported_languages.py + (./): + #- __init__.py +projects: + example: + #- prompt + example-improve: + #- README.md + #- controller.py + #- main.py + #- model.py + #- prompt + #- requirements.txt + #- run.sh + #- view.py + example-vision: + images: + #- ux_diagram.png + (./): + #- navigation.html + #- prompt +scripts: + #- clean_benchmarks.py + #- legacy_benchmark.py + #- print_chat.py + #- test_api.py +tests: + applications: + cli: + #- __init__.py + #- test_cli_agent.py + #- test_collect.py + #- test_collection_consent.py + #- test_learning.py + #- test_main.py + feature_cli: + #- __init__.py + #- test_file_selection.py + (./): + #- __init__.py + benchmark: + #- test_BenchConfig.py + core: + default: + #- __init__.py + #- test_disk_execution_env.py + #- test_disk_file_repository.py + #- test_simple_agent.py + #- test_steps.py + improve_function_test_cases: + #- apps_benchmark_6_chat + #- apps_benchmark_6_code + #- apps_benchmark_6_v2_chat + #- apps_benchmark_6_v2_code + #- controller_chat + #- controller_code + #- corrected_diff_from_missing_lines + #- create_two_new_files_chat + #- create_two_new_files_code + #- simple_calculator_chat + #- simple_calculator_code + #- task_master_chat + #- task_master_code + #- temperature_converter_chat + #- temperature_converter_code + #- theo_case_chat + #- theo_case_code + #- vgvishesh_example_2_chat + #- vgvishesh_example_2_code + #- vgvishesh_example_chat + #- vgvishesh_example_code + #- wheaties_example_chat + #- wheaties_example_code + (./): + #- __init__.py + #- test_ai.py + #- test_chat_to_files.py + #- test_git.py + #- test_salvage_correct_hunks.py + #- test_token_usage.py + test_data: + #- mona_lisa.jpg + tools: + #- example_snake_files.py + (./): + #- __init__.py + #- ai_cache.json + #- mock_ai.py + #- test_install.py + #- test_project_config.py +(./): +#- .dockerignore +#- .env.template +#- .gitignore +#- .pre-commit-config.yaml +#- .readthedocs.yaml +#- Acknowledgements.md +#- DISCLAIMER.md +#- GOVERNANCE.md +#- LICENSE +#- MANIFEST.in +#- Makefile +#- README.md +#- ROADMAP.md +#- TERMS_OF_USE.md +#- WINDOWS_README.md +#- citation.cff +#- docker-compose.yml +#- poetry.lock +#- pyproject.toml +#- quicktest.py +#- sweep.yaml +#- tox.ini diff --git a/.gitignore b/.gitignore index c0c793b88e..81c4a87511 100644 --- a/.gitignore +++ b/.gitignore @@ -93,6 +93,11 @@ webapp/.next/ gpt_engineer/benchmark/benchmarks/apps/dataset gpt_engineer/benchmark/benchmarks/mbpp/dataset +prompt + +.feature +.task + gpt_engineer/benchmark/minimal_bench_config.toml test.json diff --git a/gpt_engineer/applications/cli/file_selector.py b/gpt_engineer/applications/cli/file_selector.py index a80608620c..7e7a31825d 100644 --- a/gpt_engineer/applications/cli/file_selector.py +++ b/gpt_engineer/applications/cli/file_selector.py @@ -19,6 +19,7 @@ import fnmatch import os +import platform import subprocess from pathlib import Path @@ -218,32 +219,23 @@ def open_with_default_editor(self, file_path: Union[str, Path]): The path to the file to be opened in the text editor. """ - editors = [ - "gedit", - "notepad", - "nvim", - "write", - "nano", - "vim", - "emacs", - ] # Putting the beginner-friendly text editor forward chosen_editor = os.environ.get("EDITOR") # Try the preferred editor first, then fallback to common editors if chosen_editor: try: - subprocess.run([chosen_editor, file_path]) + subprocess.run([chosen_editor, str(file_path)], check=True) return except Exception: pass - for editor in editors: - try: - subprocess.run([editor, file_path]) - return - except Exception: - continue - print("No suitable text editor found. Please edit the file manually.") + # Platform-specific methods to open the file + if platform.system() == "Windows": + os.startfile(file_path) + elif platform.system() == "Darwin": + subprocess.run(["open", file_path]) + else: # Linux and other Unix-like systems + subprocess.run(["xdg-open", file_path]) def is_utf8(self, file_path: Union[str, Path]) -> bool: """ diff --git a/gpt_engineer/applications/cli/main.py b/gpt_engineer/applications/cli/main.py index 95158bc000..3999c279bf 100644 --- a/gpt_engineer/applications/cli/main.py +++ b/gpt_engineer/applications/cli/main.py @@ -455,8 +455,6 @@ def main( files = FileStore(project_path) if not no_execution: if improve_mode: - files_dict_before, is_linting = FileSelector(project_path).ask_for_files() - # lint the code if is_linting: files_dict_before = files.linting(files_dict_before) diff --git a/gpt_engineer/applications/feature_cli/__init__.py b/gpt_engineer/applications/feature_cli/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/gpt_engineer/applications/feature_cli/agents/__init__.py b/gpt_engineer/applications/feature_cli/agents/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/gpt_engineer/applications/feature_cli/agents/agent_steps.py b/gpt_engineer/applications/feature_cli/agents/agent_steps.py new file mode 100644 index 0000000000..3c9067ba4d --- /dev/null +++ b/gpt_engineer/applications/feature_cli/agents/agent_steps.py @@ -0,0 +1,542 @@ +from gpt_engineer.applications.feature_cli.feature import Feature +from gpt_engineer.applications.feature_cli.file_selection import FileSelector +from gpt_engineer.applications.feature_cli.repository import ( + Repository, + GitContext, +) +from gpt_engineer.applications.feature_cli.generation_tools import ( + generate_branch_name, + build_feature_context_string, + generate_suggested_tasks, +) + +from gpt_engineer.core.ai import AI +from gpt_engineer.core.prompt import Prompt +from gpt_engineer.core.default.steps import improve_fn, handle_improve_mode +from gpt_engineer.core.default.disk_memory import DiskMemory +from gpt_engineer.core.default.paths import PREPROMPTS_PATH, memory_path +from gpt_engineer.core.preprompts_holder import PrepromptsHolder + +from prompt_toolkit import ( + prompt as cli_input, + PromptSession as InputSession, + HTML, + print_formatted_text, +) +from prompt_toolkit.validation import ValidationError, Validator +from prompt_toolkit.completion import WordCompleter + + +from yaspin import yaspin + + +# This is a random comment to prove the assistant works +class FeatureValidator(Validator): + def validate(self, document): + text = document.text + if not text: + raise ValidationError( + message="Feature description cannot be empty", cursor_position=len(text) + ) + + +def print_feature_state(feature, file_selector): + + if not feature.has_description(): + output = "No active feature." + else: + feature_description = feature.get_description() + file_string = file_selector.get_pretty_selected_from_yaml() + completed_tasks_string = "None" + active_task_string = "None" + + completed_tasks = feature.get_progress()["done"] + + if completed_tasks and len(completed_tasks) > 0: + completed_tasks_string = "\n".join( + [f"• {task}" for task in completed_tasks] + ) + + if feature.has_task(): + active_task_string = feature.get_task() + + output = f""" +--- + +Active Feature + +{feature_description} + +File Selection + +{file_string} + +Completed Tasks + +{completed_tasks_string} + +Active Task + +{active_task_string} + +--- +""" + + print_formatted_text(HTML(output)) + + +def select_create_branch(): + completer = WordCompleter(["1", "2", "x"], ignore_case=True) + session = InputSession() + + # Using prompt to get user input + result = session.prompt( + """Would you like to + +1 - Initialize new feature (on new branch) +2 - Initialize new feature (on current branch) + +x - Exit + +""", + completer=completer, + ).lower() + + print() + + if result == "1": + return True + if result == "2": + return False + if result == "x": + print("Exiting...") + return + + +def initialize_new_feature(ai: AI, feature: Feature, repository: Repository): + + create__branch = select_create_branch() + + feature.clear_feature() + + update_feature_description(feature) + + if create__branch: + print("Creating feature branch... (this can be disabled with -nb setting)") + + branch_name = generate_branch_name(ai, feature.get_description()) + + branch_name = cli_input("\nConfirm branch name: ", default=branch_name) + + repository.create_branch(branch_name) + print("\nFeature branch created.\n") + + +def update_user_file_selection(file_selector: FileSelector): + file_selector.update_yaml_from_tracked_files() + file_selector.open_yaml_in_editor() + input( + "Please edit the file selection for this feature and then press Enter to continue..." + ) + + +def update_feature_description(feature: Feature): + feature.open_feature_in_editor() + input("\nPlease edit the feature file and then press Enter to continue...") + + +def update_task_description(feature: Feature): + feature.open_task_in_editor() + input("\nPlease edit the task file and then press Enter to continue...") + + +def update_feature(feature: Feature, file_selector: FileSelector): + completer = WordCompleter(["1", "2", "3", "x"], ignore_case=True) + session = InputSession() + + result = session.prompt( + HTML( + """ +Would you like to: + +1 - Edit Feature Description +2 - Edit File Selection +3 - Finish/Deactivate Feature + +x - Exit + +""" + ), + completer=completer, + ).lower() + + print() + + if result == "1": + update_feature_description(feature) + if result == "2": + update_user_file_selection(file_selector) + if result == "3": + print("Sorry! Not implemented yet.") + if result == "x": + print("Exiting...") + return + + +def initiate_new_task(ai, feature, git_context, file_selector): + """ + Runs a flow which ends in the user saving a new task in the task.md file + """ + + completer = WordCompleter(["1", "2", "3", "x"], ignore_case=True) + session = InputSession() + + result = session.prompt( + HTML( + """ +No active task... + +Would you like to: + +1 - Suggest New Tasks (Recommended) +2 - New Custom Task + +x - Exit + +""" + ), + completer=completer, + ).lower() + + print() + + if result == "1": + suggest_new_tasks(ai, feature, git_context, file_selector) + elif result == "2": + update_task_description(feature) + elif result == "x": + print("Exiting...") + return + + +def get_git_context(repository): + with yaspin(text="Gathering git context...") as spinner: + git_context = repository.get_git_context() + spinner.ok("✔") + + +def suggest_new_tasks(ai, feature, git_context, file_selector): + + files = file_selector.get_included_as_file_repository() + + try: + with yaspin(text="Generating suggested tasks...") as spinner: + response = generate_suggested_tasks(ai, feature, git_context, files) + spinner.ok("✔") # Success message + except Exception as e: + raise RuntimeError("Error generating task suggestions.") from e + + tasks = response.tasks + + max_tasks = min(len(tasks), 3) + options = [str(i + 1) for i in range(max_tasks)] + ["c"] + completer = WordCompleter(options, ignore_case=True) + + task_list_message = "\n".join( + [f"{i + 1}: {tasks[i]}" for i in range(max_tasks)] + ) + + def get_prompt(): + return f""" +AI Reasoning +{response.planning_thoughts} + +Which task would you like to you like to work on? + +{task_list_message} + +c: Custom task + +x: Exit + +""" + + session = InputSession() + result = session.prompt(HTML(get_prompt()), completer=completer).lower() + + print() + + if result in options[:-1]: + selected_task = tasks[int(result) - 1] + feature.set_task(selected_task) + + if result == "c": + update_task_description(feature) + + task = feature.get_task() + + print_formatted_text( + HTML( + f"""--- + +Active Task + +{task} + +--- +""" + ) + ) + + +def check_existing_task(feature, file_selector): + completer = WordCompleter(["1", "2", "3", "x"], ignore_case=True) + session = InputSession() + + result = session.prompt( + HTML( + """You have an existing task present + +Would you like to: + +1 - Implement task +2 - Mark task as complete +3 - Discard task and continue + +x - Exit + +""" + ), + completer=completer, + ).lower() + + print() + + if result == "1": + return True + if result == "2": + complete_task(feature, file_selector) + return False + if result == "3": + feature.set_task("") + return True + if result == "x": + print("Exiting...") + return False + + return False + + +def check_for_unstaged_changes( + repository: Repository, +): + unstaged_changes = repository.get_unstaged_changes() + + if not unstaged_changes: + return True + + completer = WordCompleter(["1", "2", "3", "x"], ignore_case=True) + session = InputSession() + + result = session.prompt( + HTML( + """Unstaged changes present... + +Would you like to: + +1 - Stage changes and continue +2 - Undo changes and continue +3 - Continue with unstaged changes + +x - Exit + +""" + ), + completer=completer, + ).lower() + + print() + + if result == "1": + repository.stage_all_changes() + if result == "2": + repository.undo_unstaged_changes() + if result == "3": + return True + if result == "x": + print("Exiting...") + return False + + return True + + +def confirm_feature_context_and_task_with_user( + feature: Feature, file_selector: FileSelector +): + file_selector.update_yaml_from_tracked_files() + file_string = file_selector.get_pretty_selected_from_yaml() + + feature_description = feature.get_description() + task = feature.get_task() + + # list feature, files and task + print(f"Feature: {feature_description}\n\n") + print(f"Files: \n\nrepo\n{file_string}\n\n") + print(f"Task: {task}\n\n") + + # do you want to attempt this task? + if cli_input("Do you want to implement this task? y/n: ").lower() in [ + "y", + "yes", + ]: + return True + + return False + + +# todo : create a function which uses the test4.py example code approach to offer a selection of options to the user +# f - "edit feature" using update_feature_description step +# s - "edit file selection" using update_user_file_selection step +# t - "edit task" using update_task_description step +# c - complete the task and start a new one +# x - exit +def adjust_prompt_files(): + input("Please edit the prompt files and then press Enter to continue...") + + +def generate_code_for_task( + project_path, + feature: Feature, + git_context: GitContext, + ai: AI, + file_selector: FileSelector, +): + + memory = DiskMemory(memory_path(project_path)) + preprompts_holder = PrepromptsHolder(PREPROMPTS_PATH) + + context_string = build_feature_context_string(feature, git_context) + + feature_agent_context = f"""I am working on a feature but breaking it up into small incremental tasks. Your job is to complete the incremental task provided to you - only that task and nothing more. + +The purpose of this message is to give you wider context around the feature you are working on and what incremental tasks have already been completed so far. + +{context_string}""" + + prompt = Prompt(feature.get_task(), prefix="Task: ") + + files = file_selector.get_included_as_file_repository() + + improve_lambda = lambda: improve_fn( + ai, prompt, files, memory, preprompts_holder, feature_agent_context + ) + + print_formatted_text("\n---- Beginning code generation ----\n") + updated_files_dictionary = handle_improve_mode(improve_lambda, memory) + print("\n---- Ending code generation ----\n") + + files.write_to_disk(updated_files_dictionary) + + +def run_adjust_loop(feature, file_selector): + implement = confirm_feature_context_and_task_with_user(feature, file_selector) + + while not implement: + adjust_prompt_files() + implement = confirm_feature_context_and_task_with_user(feature, file_selector) + + +def complete_task(feature, file_selector): + feature.complete_task() + file_selector.update_yaml_from_tracked_files() + print_formatted_text(HTML("Task Completed\n")) + + +def review_changes( + project_path, + feature: Feature, + repository: Repository, + ai: AI, + file_selector: FileSelector, +): + + completer = WordCompleter(["1", "2", "3", "4", "5", "x"], ignore_case=True) + session = InputSession() + + result = session.prompt( + HTML( + """Code generation for task complete + +Important: Please review and edit the unstaged changes with your IDE of choice... + +Would you like to: + +1 - Complete task and stage changes (Recommended) +2 - Complete task and don't stage changes + +3 - Undo changes and Retry task +4 - Leave changes and Retry task + +5 - Discard task and continue + +x - Exit + +""" + ), + completer=completer, + ).lower() + + print() + + if result == "1": + repository.stage_all_changes() + complete_task(feature, file_selector) + if result == "2": + complete_task(feature, file_selector) + if result == "3": + print("Rerunning generation...") + repository.undo_unstaged_changes() + generate_code_for_task(repository, project_path, feature, ai, file_selector) + if result == "4": + print("Rerunning generation...") + repository.undo_unstaged_changes() + generate_code_for_task(repository, project_path, feature, ai, file_selector) + if result == "5": + feature.clear_task() + + if result == "x": + print("exiting...") + return + + +def confirm_chat_feature(): + + completer = WordCompleter(["1", "2", "3", "4", "5", "x"], ignore_case=True) + session = InputSession() + + result = session.prompt( + HTML( + """Active Feature Detected + +Would you like to: + +1 - Chat with feaure context and code +2 - Chat with code only + +x - Exit + +""" + ), + completer=completer, + ).lower() + + print() + + if result == "1": + return True + if result == "2": + return False + + if result == "x": + print("exiting...") + return diff --git a/gpt_engineer/applications/feature_cli/agents/chat_agent.py b/gpt_engineer/applications/feature_cli/agents/chat_agent.py new file mode 100644 index 0000000000..34b7f51881 --- /dev/null +++ b/gpt_engineer/applications/feature_cli/agents/chat_agent.py @@ -0,0 +1,77 @@ +from gpt_engineer.core.ai import AI, HumanMessage, SystemMessage + +from gpt_engineer.applications.feature_cli.feature import Feature +from gpt_engineer.applications.feature_cli.repository import Repository +from gpt_engineer.applications.feature_cli.files import Files +from gpt_engineer.applications.feature_cli.file_selection import FileSelector +from gpt_engineer.applications.feature_cli.agents.agent_steps import ( + update_user_file_selection, + confirm_chat_feature, + get_git_context, +) +from gpt_engineer.applications.feature_cli.generation_tools import ( + build_files_context_string, +) + + +class ChatAgent: + + def __init__( + self, + ai: AI, + project_path: str, + feature: Feature, + repository: Repository, + file_selector: FileSelector, + ): + self.ai = ai + self.project_path = project_path + self.feature = feature + self.repository = repository + self.file_selector = file_selector + + def start(self): + + update_user_file_selection(self.file_selector) + + selected_files = self.file_selector.get_from_yaml().included_files + + files = Files(self.project_path, selected_files) + + context_string = f"Files from code repository:\n\n{files.to_chat()}" + + if self.feature.has_description(): + with_feature = confirm_chat_feature() + + if with_feature: + git_context = get_git_context(self.repository) + context_string = build_files_context_string( + self.feature, git_context, files + ) + + system = f"""You are the chat function of an AI software engineering tool called gpt engineer. + +The tool takes a feature descriptioin, progress on the feature, git context, and repository files relevent to the feature +and based on that it suggests new tasks to complete in order to progress the feature, and it implements those tasks for the user. + +You are not that tool, you are the chat function of that tool. You are here to help the user discuss their code and their feature and understand discuss any part of it with you - a software engineering expert. + +Always provide advice as to best software engineering practices. + +Here is the context for your conversation: + +{context_string}""" + + messages = [ + SystemMessage(content=system), + HumanMessage(content="Hi"), + ] + + while True: + print("\nAI:") + response = self.ai.backoff_inference(messages) + messages.append(response) + + print("\n\nYou:") + user_message = input() + messages.append(HumanMessage(content=user_message)) diff --git a/gpt_engineer/applications/feature_cli/agents/feature_agent.py b/gpt_engineer/applications/feature_cli/agents/feature_agent.py new file mode 100644 index 0000000000..a81216adbe --- /dev/null +++ b/gpt_engineer/applications/feature_cli/agents/feature_agent.py @@ -0,0 +1,104 @@ +from gpt_engineer.applications.feature_cli.feature import Feature +from gpt_engineer.applications.feature_cli.repository import Repository +from gpt_engineer.applications.feature_cli.file_selection import FileSelector +from gpt_engineer.applications.feature_cli.agents.agent_steps import ( + initialize_new_feature, + update_user_file_selection, + print_feature_state, + update_feature, + initiate_new_task, + generate_code_for_task, + review_changes, + check_existing_task, + check_for_unstaged_changes, + get_git_context, +) + +# Bottom comment for testing! +from gpt_engineer.core.ai import AI + +from yaspin import yaspin + + +class FeatureAgent: + """ + A cli agent which implements a feature as a set of incremental tasks + """ + + def __init__( + self, + ai: AI, + project_path: str, + feature: Feature, + repository: Repository, + file_selector: FileSelector, + ): + self.ai = ai + self.project_path = project_path + self.feature = feature + self.repository = repository + self.file_selector = file_selector + + def initialize_feature(self): + initialize_new_feature(self.ai, self.feature, self.repository) + + update_user_file_selection(self.file_selector) + + print("\nFeature Initialized. Run gptf task to begin working on it.") + + def update_feature(self): + + print_feature_state(self.feature, self.file_selector) + + if not self.feature.has_description(): + self.initialize_feature() + else: + update_feature(self.feature, self.repository) + + def run_task(self): + print_feature_state(self.feature, self.file_selector) + + if not self.feature.has_description(): + print( + """Run gptf to initialize new feature. + +or + +Run gptf task --no-feature to implement task without a feature""" + ) + return + + if self.feature.has_task(): + cont = check_existing_task(self.feature, self.file_selector) + + if not cont: + return + + while True: + git_context = get_git_context(self.repository) + + if not self.feature.has_task(): + initiate_new_task( + self.ai, self.feature, git_context, self.file_selector + ) + + cont = check_for_unstaged_changes(self.repository) + + if not cont: + return + + generate_code_for_task( + self.project_path, + self.feature, + git_context, + self.ai, + self.file_selector, + ) + + review_changes( + self.project_path, + self.feature, + self.repository, + self.ai, + self.file_selector, + ) diff --git a/gpt_engineer/applications/feature_cli/agents/simple_task_agent.py b/gpt_engineer/applications/feature_cli/agents/simple_task_agent.py new file mode 100644 index 0000000000..51c5e1e0c7 --- /dev/null +++ b/gpt_engineer/applications/feature_cli/agents/simple_task_agent.py @@ -0,0 +1,94 @@ +from gpt_engineer.applications.feature_cli.task import Task +from gpt_engineer.applications.feature_cli.repository import Repository +from gpt_engineer.applications.feature_cli.files import Files +from gpt_engineer.applications.feature_cli.file_selection import FileSelector +from gpt_engineer.applications.feature_cli.agents.agent_steps import ( + adjust_prompt_files, + check_for_unstaged_changes, + update_user_file_selection, +) + +from gpt_engineer.core.ai import AI +from gpt_engineer.core.prompt import Prompt +from gpt_engineer.core.default.steps import improve_fn, handle_improve_mode +from gpt_engineer.core.default.disk_memory import DiskMemory +from gpt_engineer.core.default.paths import PREPROMPTS_PATH, memory_path +from gpt_engineer.core.preprompts_holder import PrepromptsHolder + +from prompt_toolkit import prompt as cli_input + + +class TaskAgent: + """ + A cli agent which implements a one off task + """ + + def __init__( + self, + ai: AI, + project_path: str, + task: Task, + repository: Repository, + file_selector: FileSelector, + ): + self.ai = ai + self.project_path = project_path + self.task = task + self.repository = repository + self.file_selector = file_selector + + def _confirm__task_with_user(self): + file_selector = self.file_selector + file_selector.update_yaml_from_tracked_files() + file_string = file_selector.get_pretty_selected_from_yaml() + + task = self.task.get_task() + + print(f"Files: \n\nrepo\n{file_string}\n\n") + print(f"Task: {task}\n\n") + + # do you want to attempt this task? + if cli_input("Do you want to implement this task? y/n: ").lower() in [ + "y", + "yes", + ]: + return True + + return False + + def _run_improve_mode(self): + memory = DiskMemory(memory_path(self.project_path)) + preprompts_holder = PrepromptsHolder(PREPROMPTS_PATH) + + prompt = Prompt(self.task.get_task()) + + selected_files = self.file_selector.get_from_yaml().included_files + + files = Files(self.project_path, selected_files) + + improve_lambda = lambda: improve_fn( + self.ai, prompt, files, memory, preprompts_holder + ) + + print("\n---- begining code generation ----\n") + updated_files_dictionary = handle_improve_mode(improve_lambda, memory) + print("\n---- ending code generation ----\n") + + files.write_to_disk(updated_files_dictionary) + + def run(self): + + self.task.open_task_in_editor() + input("Please edit the task file and then press Enter to continue...") + + update_user_file_selection(self.file_selector) + + implement = self._confirm__task_with_user() + + while not implement: + adjust_prompt_files() + implement = self._confirm__task_with_user() + + check_for_unstaged_changes(self.repository) + + self._run_improve_mode() diff --git a/gpt_engineer/applications/feature_cli/domain.py b/gpt_engineer/applications/feature_cli/domain.py new file mode 100644 index 0000000000..2493c40772 --- /dev/null +++ b/gpt_engineer/applications/feature_cli/domain.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass +from typing import List + + +@dataclass +class FileSelection: + included_files: List[str] + excluded_files: List[str] + + +class Settings: + def __init__(self, no_branch: bool = False): + self.no_branch = no_branch diff --git a/gpt_engineer/applications/feature_cli/feature.py b/gpt_engineer/applications/feature_cli/feature.py new file mode 100644 index 0000000000..b2c24b6af9 --- /dev/null +++ b/gpt_engineer/applications/feature_cli/feature.py @@ -0,0 +1,196 @@ +import json +import os +import platform +import subprocess +from pathlib import Path +from typing import Union + +from gpt_engineer.core.default.disk_memory import DiskMemory +from gpt_engineer.core.default.paths import memory_path +from gpt_engineer.applications.feature_cli.file_selection import FileSelector +from gpt_engineer.applications.feature_cli.repository import Repository + + +class Feature(DiskMemory): + """ + Represents a ticket which will be developed incrementally, + + Includes with a feature (overal description of the change), + a task (current incremental work item), + and progress (history of incremental work completed) + """ + + def __init__(self, project_path: Union[str, Path], repository: Repository): + + self._feature_path = Path(project_path) / ".feature" + self.path = self._feature_path + self._feature_filename = "feature.md" + self._progress_filename = "progress.json" + self._task_filename = "task.md" + + self._feature_placeholder = """Please replace with your own feature description. Markdown is supported. + +Hint: +Improve your prompts by including technical references to any APIs, libraries, components etc that the pre trained model may not know about in detail already.""" + + self._task_placeholder = "Please replace with a task description - directing the AI on the first task to implement on this feature" + + if not os.path.exists(self._feature_path): + os.makedirs(self._feature_path) + + super().__init__(self._feature_path) + + def clear_feature(self) -> None: + self.set_description(self._feature_placeholder) + self.clear_task() + super().__setitem__(self._progress_filename, json.dumps({"done": []})) + + def clear_task(self) -> None: + self.set_task(self._task_placeholder) + + def get_description(self) -> str: + """ + Retrieve the content of the feature file in the database. + + Returns + ------- + str + The content of the feature file. + """ + if super().__contains__(self._feature_filename): + return super().__getitem__(self._feature_filename) + + return None + + def set_description(self, feature_description: str): + """ + Updates the feature file with new text. + + Parameters + ---------- + feature_description : str + The new feature_description to write to the feature file. + """ + super().__setitem__(self._feature_filename, feature_description) + + def has_description(self) -> bool: + """ + Does the feature have a description? + """ + + description = self.get_description() + + if description and not description == self._feature_placeholder: + return True + + return False + + def get_progress(self) -> dict: + """ + Retrieve the progress object. + + Returns + ------- + str + The content of the feature file. + """ + + if super().__contains__(self._progress_filename): + json_string = super().__getitem__(self._progress_filename) + if json_string: + return json.loads(json_string) + + return None + + def update_progress(self, task: str): + """ + Updates the progress with a new completed task. + + Parameters + ---------- + feature_description : str + The new feature_description to write to the feature file. + """ + progress = self.get_progress() + + progress["done"].append(task) + + json_string = json.dumps(progress, indent=4) + + super().__setitem__(self._progress_filename, json_string) + + def set_task(self, task: str): + """ + Updates the task file with new text. + + Parameters + ---------- + task : str + The new task to write to the feature file. + """ + super().__setitem__(self._task_filename, task) + + def get_task(self) -> str: + """ + Retrieve the content of the feature file in the database. + + Returns + ------- + str + The content of the feature file. + """ + if super().__contains__(self._task_filename): + return super().__getitem__(self._task_filename) + + return None + + def has_task(self) -> bool: + """ + Does the feature have an active task? + """ + + task = self.get_task() + + if task and not task == self._task_placeholder: + return True + + return False + + def complete_task(self): + """ + Moves the current task to the 'done' list in the progress.json file and clears the task file. + """ + task = self.get_task() + + if task: + self.update_progress(task) + self.set_task("") + + def _file_path(self, filename): + return self._feature_path / filename + + def _open_file_in_editor(self, path): + """ + Opens the generated YAML file in the default system editor. + If the YAML file is empty or doesn't exist, generate it first. + """ + + # Platform-specific methods to open the file + if platform.system() == "Windows": + os.startfile(path) + elif platform.system() == "Darwin": + subprocess.run(["open", path]) + else: # Linux and other Unix-like systems + subprocess.run(["xdg-open", path]) + + def open_feature_in_editor(self): + """ + Opens the feature file in the default system editor. + """ + self._open_file_in_editor(self._file_path(self._feature_filename)) + + def open_task_in_editor(self): + """ + Opens the task file in the default system editor. + """ + self._open_file_in_editor(self._file_path(self._task_filename)) diff --git a/gpt_engineer/applications/feature_cli/file_selection.py b/gpt_engineer/applications/feature_cli/file_selection.py new file mode 100644 index 0000000000..39b3979c3a --- /dev/null +++ b/gpt_engineer/applications/feature_cli/file_selection.py @@ -0,0 +1,320 @@ +import os +import platform +import subprocess +import yaml +from pathlib import Path + + +from gpt_engineer.core.default.paths import memory_path +from gpt_engineer.core.ai import AI + +from gpt_engineer.applications.feature_cli.repository import Repository +from gpt_engineer.applications.feature_cli.files import Files +from gpt_engineer.applications.feature_cli.generation_tools import ( + fuzzy_parse_file_selection, +) +from gpt_engineer.applications.feature_cli.domain import FileSelection + + +def paths_to_tree(paths): + tree = {} + files_marker = "(./)" + + for path in paths: + parts = path.split("/") + current_level = tree + + for part in parts[:-1]: + if part not in current_level: + current_level[part] = {} + current_level = current_level[part] + + if isinstance(current_level, dict): + if files_marker not in current_level: + current_level[files_marker] = [] + current_level[files_marker].append(parts[-1]) + + # Clean and sort the tree to match the required format + def clean_tree(node): + if not isinstance(node, dict): + return node + sorted_keys = sorted(node.keys(), key=lambda x: (x == files_marker, x)) + cleaned_node = {key: clean_tree(node[key]) for key in sorted_keys} + if sorted_keys == [files_marker]: + return cleaned_node[files_marker] + return cleaned_node + + cleaned_tree = clean_tree(tree) + return cleaned_tree + + +def tree_to_paths(tree): + + files_marker = "(./)" + + def traverse_tree(tree, base_path=""): + paths = [] + if tree: + for key, value in tree.items(): + if key == files_marker: + if value: + for file in value: + paths.append(os.path.join(base_path, file)) + elif isinstance(value, list): + for file in value: + paths.append(os.path.join(base_path, key, file)) + else: + subfolder_path = os.path.join(base_path, key) + paths.extend(traverse_tree(value, subfolder_path)) + return paths + + return traverse_tree(tree) + + +def commented_yaml_to_file_selection(commented_content) -> FileSelection: + commented_content_lines = commented_content.split("\n") + uncommented_content_1 = "\n".join( + line.replace("# ", "").replace("#", "") for line in commented_content_lines + ) + uncommented_content_2 = "\n".join( + line.replace("#", "") for line in commented_content_lines + ) + + included_files = tree_to_paths(yaml.safe_load(commented_content)) + try: + all_files = tree_to_paths(yaml.safe_load(uncommented_content_1)) + except: + try: + all_files = tree_to_paths(yaml.safe_load(uncommented_content_2)) + except: + raise ValueError( + "Could not convert the commented yaml to a file selection. Please check the format." + ) + + included_files_not_in_all_files = set(included_files) - set(all_files) + + if len(included_files_not_in_all_files) > 0: + raise ValueError("Yaml file selection has not been read correctly.") + + excluded_files = list(set(all_files) - set(included_files)) + return FileSelection(included_files, excluded_files) + + +def file_selection_to_commented_yaml(selection: FileSelection) -> str: + # Dont worry about commenting lines if they are no excluded files + if not selection.excluded_files: + tree = paths_to_tree(selection.included_files) + + return yaml.dump(tree, sort_keys=False) + + all_files = list(selection.included_files) + list(selection.excluded_files) + + current_tree = paths_to_tree(all_files) + + # Add a # in front of files which are excluded. This is a marker for us to go back and properly comment them out + def mark_excluded_files(structure, prefix=""): + if isinstance(structure, dict): + for key, value in structure.items(): + if key == "(./)": + structure[key] = mark_excluded_files(value, prefix) + else: + new_prefix = os.path.join(prefix, key) + structure[key] = mark_excluded_files(value, new_prefix) + elif isinstance(structure, list): + for i, item in enumerate(structure): + full_path = os.path.join(prefix, item) + + if full_path in selection.excluded_files: + structure[i] = f"#{item}" + + return structure + + mark_excluded_files(current_tree) + + content = yaml.dump(current_tree, sort_keys=False) + + # Find all files marked for commenting - add comment and remove the mark. + def comment_marked_files(yaml_content): + lines = yaml_content.split("\n") + + updated_lines = [] + for line in lines: + if "#" in line: + line = line.replace("- '#", "#- ").replace("'", "") + updated_lines.append(line) + + return "\n".join(updated_lines) + + commented_yaml = comment_marked_files(content) + + return commented_yaml + + +class FileSelector: + """ + Manages the active files in a project directory and creates a YAML file listing them. + """ + + def __init__(self, project_path: str, repository: Repository): + self.project_path = project_path + self.ai = AI("gpt-4o", temperature=0) + self.repository = repository + self.yaml_path = Path(project_path) / ".feature" / "files.yml" + + if os.path.exists(self.yaml_path): + return + + print("YAML file is missing or empty, generating YAML...") + + file_selection = FileSelection([], self.repository.get_tracked_files()) + + self.set_to_yaml(file_selection) + + def _write_yaml_with_header(self, yaml_content): + + def add_indentation(content): + lines = content.split("\n") + new_lines = [] + last_key = None + + for line in lines: + stripped_line = line.replace("#", "").strip() + if stripped_line.endswith(":"): + last_key = stripped_line + if stripped_line.startswith("- ") and (last_key != "(./):"): + # add 2 spaces at the begining of line or after any # + + new_lines.append(" " + line) # Add extra indentation + else: + new_lines.append(line) + return "\n".join(new_lines) + + indented_content = add_indentation(yaml_content) + with open(self.yaml_path, "w") as file: + file.write( + f"""# Uncomment any files you would like to use for this feature +# Note that (./) is a special key which represents files at the root of the parent directory + +{indented_content}""" + ) + + def _read_yaml_with_headers(self): + with open(self.yaml_path, "r") as file: + original_content_lines = file.readlines()[3:] + + return "".join(original_content_lines) + + def set_to_yaml(self, file_selection): + + commented_yaml = file_selection_to_commented_yaml(file_selection) + + self._write_yaml_with_header(commented_yaml) + + return + + def update_yaml_from_tracked_files(self): + """ + Updates the YAML file with the current list of tracked files. + """ + + tracked_files = self.repository.get_tracked_files() + + file_selection = self.get_from_yaml() + + # If there are no changes, do nothing + if set(tracked_files) == set( + file_selection.included_files + file_selection.excluded_files + ): + return + + new_included_files = list( + set(tracked_files) - set(file_selection.excluded_files) + ) + + self.set_to_yaml( + FileSelection(new_included_files, file_selection.excluded_files) + ) + + def get_from_yaml(self) -> FileSelection: + """ + Get selected file paths and excluded file paths from yaml + """ + + yaml_content = self._read_yaml_with_headers() + + try: + file_selection = commented_yaml_to_file_selection(yaml_content) + except: + print( + "Could not read the file selection from the YAML file. Attempting to fix with AI" + ) + print(yaml_content) + file_selection = fuzzy_parse_file_selection(self.ai, yaml_content) + self.set_to_yaml(file_selection) + + return file_selection + + def get_pretty_selected_from_yaml(self) -> str: + """ + Retrieves selected file paths from the YAML file and prints them in an ASCII-style tree structure. + """ + # Get selected files from YAML + file_selection = self.get_from_yaml() + + # Helper function to insert a path into the tree dictionary + def insert_path(tree, path_parts): + # Recursively build nested dictionary from path parts + if not path_parts: + return + if path_parts[0] not in tree: + tree[path_parts[0]] = {} + insert_path(tree[path_parts[0]], path_parts[1:]) + + file_tree = {} + for filepath in file_selection.included_files: + parts = filepath.split("/") + insert_path(file_tree, parts) + + # Helper function to format the tree into a string with ASCII graphics + def format_tree(tree, prefix=""): + lines = [] + # Separate directories and files + directories = {k: v for k, v in tree.items() if v} + files = {k: v for k, v in tree.items() if not v} + # Sort items to keep alphabetical order, directories first + items = sorted(directories.items()) + sorted(files.items()) + for i, (key, sub_tree) in enumerate(items): + if i == len(items) - 1: # Last item uses └── + lines.append(prefix + "└── " + key) + extension = " " + else: + lines.append(prefix + "├── " + key) + extension = "│ " + if sub_tree: + lines.extend(format_tree(sub_tree, prefix=prefix + extension)) + return lines + + # Generate formatted tree lines + tree_lines = format_tree(file_tree) + + # Join lines and return as a string + return "\n".join(tree_lines) + + def open_yaml_in_editor(self): + """ + Opens the generated YAML file in the default system editor. + If the YAML file is empty or doesn't exist, generate it first. + """ + + # Platform-specific methods to open the file + if platform.system() == "Windows": + os.startfile(self.yaml_path) + elif platform.system() == "Darwin": + subprocess.run(["open", self.yaml_path]) + else: # Linux and other Unix-like systems + subprocess.run(["xdg-open", self.yaml_path]) + + def get_included_as_file_repository(self): + file_selection = self.get_from_yaml() + + return Files(self.project_path, file_selection.included_files) diff --git a/gpt_engineer/applications/feature_cli/files.py b/gpt_engineer/applications/feature_cli/files.py new file mode 100644 index 0000000000..b1e7347129 --- /dev/null +++ b/gpt_engineer/applications/feature_cli/files.py @@ -0,0 +1,40 @@ +from pathlib import Path + +from gpt_engineer.core.files_dict import FilesDict + + +class Files(FilesDict): + def __init__(self, project_path: str, selected_files: list): + """ + Initialize the Files object by reading the content of the provided file paths. + + Parameters + ---------- + project_path : str + The base path of the project. + selected_files : list + List of file paths relative to the project path. + """ + + self.project_path = project_path + # Convert the list of selected files and their relative directory into a dictionary of relative file paths + content_dict = {} + for file_path in selected_files: + try: + with open( + Path(project_path) / file_path, "r", encoding="utf-8" + ) as content: + content_dict[str(file_path)] = content.read() + except FileNotFoundError: + print(f"Warning: File not found {file_path}") + except UnicodeDecodeError: + print(f"Warning: File not UTF-8 encoded {file_path}, skipping") + super().__init__(content_dict) + + def write_to_disk(self, files: FilesDict): + for name, content in files.items(): + path = Path(self.project_path) / name + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + f.write(content) + return self diff --git a/gpt_engineer/applications/feature_cli/generation_tools.py b/gpt_engineer/applications/feature_cli/generation_tools.py new file mode 100644 index 0000000000..9afabd79f6 --- /dev/null +++ b/gpt_engineer/applications/feature_cli/generation_tools.py @@ -0,0 +1,492 @@ +import xml.etree.ElementTree as ET +import json + +from gpt_engineer.applications.feature_cli.domain import FileSelection +from gpt_engineer.core.ai import AI + +from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler + + +def generate_branch_name(ai: AI, feature_description: str) -> str: + system_prompt = """ + You are a branch name autocomplete / suggestion tool. Based on the users input, please respond with a single suggestion of a branch name and notthing else. + + Example: + + Input: I want to add a login button + Output: feature/login-button + """ + + ai.llm.callbacks.clear() # silent + + messages = ai.start(system_prompt, feature_description, step_name="name-branch") + + ai.llm.callbacks.append(StreamingStdOutCallbackHandler()) + + return messages[-1].content.strip() + + +class TaskResponse: + def __init__(self, planning_thoughts, tasks, closing_remarks): + self.planning_thoughts = planning_thoughts + self.tasks = tasks + self.closing_remarks = closing_remarks + + def __str__(self): + return f"Planning Thoughts: {self.planning_thoughts}\nTasks: {'; '.join(self.tasks)}\nClosing Remarks: {self.closing_remarks}" + + +def parse_task_xml_to_class(xml_data): + # Parse the XML data + root = ET.fromstring(xml_data) + + # Extract the planning thoughts + planning_thoughts = root.find("PlanningThoughts").text.strip() + + # Extract tasks + tasks = [task.text.strip() for task in root.findall(".//Task")] + + # Extract closing remarks + closing_remarks = root.find("ClosingRemarks").text.strip() + + # Create an instance of the response class + response = TaskResponse(planning_thoughts, tasks, closing_remarks) + + return response + + +def build_git_context_string(git_context): + return f"""## Git Context - these are the code changes made so far while implementing this feature. This may include work completed by you on previous tasks as well as changes made independently by me. +### Branch Changes - this is the cumulative diff of all the commits so far on the feature branch. +{git_context.branch_changes} + +### Staged Changes - this is the diff of the current staged changes. +{git_context.staged_changes}""" + + +def build_feature_context_string(feature, git_context): + feature_string = f"""## Feature - this is the description fo the current feature we are working on. +{feature.get_description()} + +## Completed Tasks - these are the lists of tasks you have completed so far on the feature branch. +{feature.get_progress()["done"]} +""" + + if git_context: + return f"""{feature_string} + +{build_git_context_string(git_context)} +""" + + return feature_string + + +def build_files_context_string(feature, git_context, files): + return f"""{build_feature_context_string(feature, git_context)} + +## Current Codebase - this is the as is view of the current code base including any unstaged changes. +{files.to_chat()} +""" + + +def generate_suggested_tasks(ai: AI, feature, git_context, files) -> str: + system_prompt = """ +You are a software engineer work planning tool. Given a feature description, a list of tasks already completed, and sections of the code +repository we are working on, suggest a list of implementation tasks to be done in order to move towards the end goal of completing the feature. + +An implementation task consists of actually writing some code - and doesnt include review or research tasks, or any other activity other tha writing code. + +First start by outputting your planning thoughts: an overview of what we are trying to achieve, what we have achieved so far, and what implementation tasks are left to be done. + +Then output the list of between 0 and 3 implementation tasks to be done which get us closer to our goal. Please try to keep the tasks small, actionable and independantly commitable. + +We only need to move towards our goal with these tasks, we dont have to complete the feature in these 3 steps. + +The output format will be XML as follows: + + + + + + + + + + + + + + + + + + + + + +Respond in XML and nothing else. + +You may send as as little as 0 tasks and as many as 3. If you believe the feature is complete, send 0 tasks. +""" + + input = build_files_context_string(feature, git_context, files) + + ai.llm.callbacks.clear() # silent + + messages = ai.start(system_prompt, input, step_name="suggest-tasks") + + ai.llm.callbacks.append(StreamingStdOutCallbackHandler()) + + raw_response = messages[-1].content.strip() + + xml_start = raw_response.find("<") + xml_end = raw_response.rfind(">") + 1 + xml = raw_response[xml_start:xml_end] + + try: + resp = parse_task_xml_to_class(xml) + except: + print(raw_response) + + return resp + + +def fuzzy_parse_file_selection(ai: AI, yaml_string: str) -> FileSelection: + # todo: load prompt from ptompts/fuzzy_file_parser + + system_prompt = """## Explanation +You are a fuzzy yaml parser, who correctly parses yaml even if it is not strictly valid. + +A user has been given a yaml representation of a file structure, represented like so: + +.github: + ISSUE_TEMPLATE: + - bug-report.md + - documentation-clarification.md + - feature-request.md + PULL_REQUEST_TEMPLATE: + - PULL_REQUEST_TEMPLATE.md + workflows: + - automation.yml + - ci.yaml + - pre-commit.yaml + - release.yaml + (./): + - CODEOWNERS + - CODE_OF_CONDUCT.md + - CONTRIBUTING.md + - FUNDING.yml + +Folders are represented as keys in a dictionary, files are items in a list. Any files listed under the (./) key can be assumed to be files of the folder above that. + +The given example maps to these file paths: + +".github/ISSUE_TEMPLATE/bug-report.md", +".github/ISSUE_TEMPLATE/documentation-clarification.md", +".github/ISSUE_TEMPLATE/feature-request.md", +".github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md", +".github/workflows/automation.yml", +".github/workflows/ci.yaml", +".github/workflows/pre-commit.yaml", +".github/workflows/release.yaml", +".github/CODEOWNERS", +".github/CODE_OF_CONDUCT.md", +".github/CONTRIBUTING.md", +".github/FUNDING.yml", + +An example of the yaml file after commenting might be something like this: + + +.github: + # ISSUE_TEMPLATE: + # - bug-report.md + # - documentation-clarification.md + # - feature-request.md + # PULL_REQUEST_TEMPLATE: + # - PULL_REQUEST_TEMPLATE.md + workflows: + - automation.yml + - ci.yaml + - pre-commit.yaml + - release.yaml + # (./): + # - CODEOWNERS + - CODE_OF_CONDUCT.md + - CONTRIBUTING.md + # - FUNDING.yml + + +This would convert into: + +{ + "included_files": [ + ".github/workflows/automation.yml", + ".github/workflows/ci.yaml", + ".github/workflows/pre-commit.yaml", + ".github/workflows/release.yaml", + ".github/CODE_OF_CONDUCT.md", + ".github/CONTRIBUTING.md" + ], + "excluded_files": [ + ".github/ISSUE_TEMPLATE/bug-report.md", + ".github/ISSUE_TEMPLATE/documentation-clarification.md", + ".github/ISSUE_TEMPLATE/feature-request.md", + ".github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md", + ".github/CODEOWNERS", + ".github/FUNDING.yml" + ] +} + + +Although the commmented content wasnt strictly correct yaml, their intentions were clear. They wanted to retain the files in the workflow folder aswell as the code of conduct and contributing guides + +Based on commented yaml inputs such as this, your job is to output JSON, indicating which files have been included and which have been excluded. + +Excluded files are always commented out with a # like in the above example. + +The json you should return will be like this: + +{ + "included_files": [ + "folder1/file5", + "folder1/folder3/file3", + "file7" + ], + "excluded_files": [ + "folder1/folder2/file1", + "folder1/folder2/file2", + "folder1/folder3/file4", + "folder1/file5", + ] +} + +Files can only be included or excluded, not both. If you are confused about the state of a file make your best guess - and if you really arent sure then mark it as included. + +Respond in JSON and nothing else. + +## Examples + +Example 1: + +Input: + +.github: + ISSUE_TEMPLATE: + - bug_report.md + - feature_request.md + PULL_REQUEST_TEMPLATE: + - pull_request_template.md + # workflows: + # - ci.yml + # - release.yml + +Output: + +{ + "included_files": [ + ".github/ISSUE_TEMPLATE/bug_report.md", + ".github/ISSUE_TEMPLATE/feature_request.md", + ".github/PULL_REQUEST_TEMPLATE/pull_request_template.md" + ], + "excluded_files": [ + ".github/workflows/ci.yml", + ".github/workflows/release.yml" + ] +} + +Example 2: + +Input: + +source: + # controllers: + # - MainController.cs + # - AuthController.cs + models: + - User.cs + - Post.cs + views: + Home: + - Index.cshtml + # - About.cshtml + Auth: + - Login.cshtml + - Register.cshtml + (./): + - Dockerfile + +Output: + +{ + "included_files": [ + "source/models/User.cs", + "source/models/Post.cs", + "source/views/Home/Index.cshtml", + "source/views/Auth/Login.cshtml", + "source/views/Auth/Register.cshtml" + "source/Dockerfile", + ], + "excluded_files": [ + "source/controllers/MainController.cs", + "source/controllers/AuthController.cs", + "source/views/Home/About.cshtml" + ] +} + +Example 3: + +Input: + +src: + main: + java: + com: + example: + # controllers: + # - UserController.java + # - PostController.java + models: + - User.java + - Post.java + # repositories: + # - UserRepository.java + # - PostRepository.java + services: + - UserService.java + - PostService.java + resources: + - application.properties + test: + java: + com: + example: + controllers: + - UserControllerTest.java + - PostControllerTest.java + (./): + - pom.xml + - Dockerfile + +Output: + +{ + "included_files": [ + "src/main/java/com/example/models/User.java", + "src/main/java/com/example/models/Post.java", + "src/main/java/com/example/services/UserService.java", + "src/main/java/com/example/services/PostService.java", + "src/main/resources/application.properties", + "src/test/java/com/example/controllers/UserControllerTest.java", + "src/test/java/com/example/controllers/PostControllerTest.java", + "pom.xml", + "Dockerfile" + ], + "excluded_files": [ + "src/main/java/com/example/controllers/UserController.java", + "src/main/java/com/example/controllers/PostController.java", + "src/main/java/com/example/repositories/UserRepository.java", + "src/main/java/com/example/repositories/PostRepository.java" + ] +} + +Example 4: + +Input: + + +app: + # controllers: + # - application_controller.rb + # - users_controller.rb + models: + - user.rb + - post.rb + views: + layouts: + - application.html.erb + users: + - index.html.erb + - show.html.erb + posts: + - index.html.erb + # - show.html.erb + (./): + - Gemfile + - config +config: + environments: + - development.rb + - test.rb + # - production.rb + initializers: + - application_controller_renderer.rb + locales: + - en.yml + # routes.rb +db: + migrate: + - 20211025120523_create_users.rb + - 20211025120530_create_posts.rb +test: + fixtures: + - users.yml + - posts.yml + # controllers: + # - users_controller_test.rb + # - posts_controller_test.rb + models: + - user_test.rb + - post_test.rb + + +Output: + +{ + "included_files": [ + "app/models/user.rb", + "app/models/post.rb", + "app/views/layouts/application.html.erb", + "app/views/users/index.html.erb", + "app/views/users/show.html.erb", + "app/views/posts/index.html.erb", + "app/Gemfile", + "config/environments/development.rb", + "config/environments/test.rb", + "config/initializers/application_controller_renderer.rb", + "config/locales/en.yml", + "db/migrate/20211025120523_create_users.rb", + "db/migrate/20211025120530_create_posts.rb", + "test/fixtures/users.yml", + "test/fixtures/posts.yml", + "test/models/user_test.rb", + "test/models/post_test.rb" + ], + "excluded_files": [ + "app/controllers/application_controller.rb", + "app/controllers/users_controller.rb", + "app/views/posts/show.html.erb", + "config/environments/production.rb", + "config/routes.rb", + "test/controllers/users_controller_test.rb", + "test/controllers/posts_controller_test.rb" + ] +} + +## IMPORTANT +Remember any line that is commented is an excluded file. Any line that is NOT commented - is an included file. +""" + + # ai.llm.callbacks.clear() # silent + + messages = ai.start(system_prompt, yaml_string, step_name="fuzzy-parse-yaml") + + # ai.llm.callbacks.append(StreamingStdOutCallbackHandler()) + + json_string = messages[-1].content.strip() + + # strip anything before first { and after last } + json_string = json_string[json_string.find("{") : json_string.rfind("}") + 1] + + data = json.loads(json_string) + + return FileSelection(data["included_files"], data["excluded_files"]) diff --git a/gpt_engineer/applications/feature_cli/main.py b/gpt_engineer/applications/feature_cli/main.py new file mode 100644 index 0000000000..cd5c1a0c6a --- /dev/null +++ b/gpt_engineer/applications/feature_cli/main.py @@ -0,0 +1,125 @@ +import typer +from dotenv import load_dotenv + + +from gpt_engineer.applications.feature_cli.agents.feature_agent import ( + FeatureAgent, +) +from gpt_engineer.applications.feature_cli.agents.chat_agent import ChatAgent +from gpt_engineer.applications.feature_cli.feature import Feature +from gpt_engineer.applications.feature_cli.repository import Repository +from gpt_engineer.applications.feature_cli.domain import Settings +from gpt_engineer.applications.feature_cli.file_selection import FileSelector + + +from gpt_engineer.core.ai import AI + +app = typer.Typer() + + +@app.command() +def feature( + new: bool = typer.Option(False, "--new", "-n", help="Initialize a new feature."), + project_path: str = typer.Option(".", "--path", "-p", help="Path to the project."), + model: str = typer.Option("gpt-4o", "--model", "-m", help="Model ID string."), + temperature: float = typer.Option( + 0.1, + "--temperature", + "-t", + help="Controls randomness: lower values for more focused, deterministic outputs.", + ), + verbose: bool = typer.Option( + False, "--verbose", "-v", help="Enable verbose logging for debugging." + ), + debug: bool = typer.Option( + False, "--debug", "-d", help="Enable debug mode for debugging." + ), +): + """ + Handle features in the project. + """ + load_dotenv() + + ai = AI( + model_name=model, + temperature=temperature, + ) + + repository = Repository(project_path) + + feature = Feature(project_path, repository) + + file_selector = FileSelector(project_path, repository) + + agent = FeatureAgent(ai, project_path, feature, repository, file_selector) + + if new: + agent.initialize_feature() + else: + agent.update_feature() + + +@app.command() +def chat( + project_path: str = typer.Option(".", "--path", "-p", help="Path to the project."), + model: str = typer.Option("gpt-4o", "--model", "-m", help="Model ID string."), + temperature: float = typer.Option( + 0.8, + "--temperature", + "-t", + help="Controls randomness: lower values for more focused, deterministic outputs.", + ), +): + """ + Initiate a chat about the current repository and feature context + """ + ai = AI( + model_name=model, + temperature=temperature, + ) + + repository = Repository(project_path) + + feature = Feature(project_path, repository) + + file_selector = FileSelector(project_path, repository) + + chat_agent = ChatAgent(ai, project_path, feature, repository, file_selector) + + chat_agent.start() + + +if __name__ == "__main__": + app() + + +@app.command() +def task( + project_path: str = typer.Option(".", "--path", "-p", help="Path to the project."), + model: str = typer.Option("gpt-4o", "--model", "-m", help="Model ID string."), + temperature: float = typer.Option( + 0.1, + "--temperature", + "-t", + help="Controls randomness: lower values for more focused, deterministic outputs.", + ), +): + """ + Implement a simple one off task without feature context + """ + load_dotenv() + + ai = AI( + model_name=model, + temperature=temperature, + ) + + repository = Repository(project_path) + + feature = Feature(project_path, repository) + + file_selector = FileSelector(project_path, repository) + + agent = FeatureAgent(ai, project_path, feature, repository, file_selector) + + agent.run_task() diff --git a/gpt_engineer/applications/feature_cli/prompts/__init__.py b/gpt_engineer/applications/feature_cli/prompts/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/gpt_engineer/applications/feature_cli/prompts/fuzzy_file_parser b/gpt_engineer/applications/feature_cli/prompts/fuzzy_file_parser new file mode 100644 index 0000000000..75200759b1 --- /dev/null +++ b/gpt_engineer/applications/feature_cli/prompts/fuzzy_file_parser @@ -0,0 +1,320 @@ +## Explanation +You are a fuzzy yaml parser, who correctly parses yaml even if it is not strictly valid. + +A user has been given a yaml representation of a file structure, represented like so: + +.github: + ISSUE_TEMPLATE: + - bug-report.md + - documentation-clarification.md + - feature-request.md + PULL_REQUEST_TEMPLATE: + - PULL_REQUEST_TEMPLATE.md + workflows: + - automation.yml + - ci.yaml + - pre-commit.yaml + - release.yaml + (./): + - CODEOWNERS + - CODE_OF_CONDUCT.md + - CONTRIBUTING.md + - FUNDING.yml + +Folders are represented as keys in a dictionary, files are items in a list. Any files listed under the (./) key can be assumed to be files of the folder above that. + +The given example maps to these file paths: + +".github/ISSUE_TEMPLATE/bug-report.md", +".github/ISSUE_TEMPLATE/documentation-clarification.md", +".github/ISSUE_TEMPLATE/feature-request.md", +".github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md", +".github/workflows/automation.yml", +".github/workflows/ci.yaml", +".github/workflows/pre-commit.yaml", +".github/workflows/release.yaml", +".github/CODEOWNERS", +".github/CODE_OF_CONDUCT.md", +".github/CONTRIBUTING.md", +".github/FUNDING.yml", + +An example of the yaml file after commenting might be something like this: + + +.github: + # ISSUE_TEMPLATE: + # - bug-report.md + # - documentation-clarification.md + # - feature-request.md + # PULL_REQUEST_TEMPLATE: + # - PULL_REQUEST_TEMPLATE.md + workflows: + - automation.yml + - ci.yaml + - pre-commit.yaml + - release.yaml + # (./): + # - CODEOWNERS + - CODE_OF_CONDUCT.md + - CONTRIBUTING.md + # - FUNDING.yml + + +This would convert into: + +{ + "included_files": [ + ".github/workflows/automation.yml", + ".github/workflows/ci.yaml", + ".github/workflows/pre-commit.yaml", + ".github/workflows/release.yaml", + ".github/CODE_OF_CONDUCT.md", + ".github/CONTRIBUTING.md" + ], + "excluded_files": [ + ".github/ISSUE_TEMPLATE/bug-report.md", + ".github/ISSUE_TEMPLATE/documentation-clarification.md", + ".github/ISSUE_TEMPLATE/feature-request.md", + ".github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md", + ".github/CODEOWNERS", + ".github/FUNDING.yml" + ] +} + + +Although the commmented content wasnt strictly correct yaml, their intentions were clear. They wanted to retain the files in the workflow folder aswell as the code of conduct and contributing guides + +Based on commented yaml inputs such as this, your job is to output JSON, indicating which files have been included and which have been excluded. + +Excluded files are always commented out with a # like in the above example. + +The json you should return will be like this: + +{ + "included_files": [ + "folder1/file5", + "folder1/folder3/file3", + "file7" + ], + "excluded_files": [ + "folder1/folder2/file1", + "folder1/folder2/file2", + "folder1/folder3/file4", + "folder1/file5", + ] +} + +Files can only be included or excluded, not both. If you are confused about the state of a file make your best guess - and if you really arent sure then mark it as included. + +Respond in JSON and nothing else. + +## Examples + +Example 1: + +Input: + +.github: + ISSUE_TEMPLATE: + - bug_report.md + - feature_request.md + PULL_REQUEST_TEMPLATE: + - pull_request_template.md + # workflows: + # - ci.yml + # - release.yml + +Output: + +{ + "included_files": [ + ".github/ISSUE_TEMPLATE/bug_report.md", + ".github/ISSUE_TEMPLATE/feature_request.md", + ".github/PULL_REQUEST_TEMPLATE/pull_request_template.md" + ], + "excluded_files": [ + ".github/workflows/ci.yml", + ".github/workflows/release.yml" + ] +} + +Example 2: + +Input: + +source: + # controllers: + # - MainController.cs + # - AuthController.cs + models: + - User.cs + - Post.cs + views: + Home: + - Index.cshtml + # - About.cshtml + Auth: + - Login.cshtml + - Register.cshtml + (./): + - Dockerfile + +Output: + +{ + "included_files": [ + "source/models/User.cs", + "source/models/Post.cs", + "source/views/Home/Index.cshtml", + "source/views/Auth/Login.cshtml", + "source/views/Auth/Register.cshtml" + "source/Dockerfile", + ], + "excluded_files": [ + "source/controllers/MainController.cs", + "source/controllers/AuthController.cs", + "source/views/Home/About.cshtml" + ] +} + +Example 3: + +Input: + +src: + main: + java: + com: + example: + # controllers: + # - UserController.java + # - PostController.java + models: + - User.java + - Post.java + # repositories: + # - UserRepository.java + # - PostRepository.java + services: + - UserService.java + - PostService.java + resources: + - application.properties + test: + java: + com: + example: + controllers: + - UserControllerTest.java + - PostControllerTest.java + (./): + - pom.xml + - Dockerfile + +Output: + +{ + "included_files": [ + "src/main/java/com/example/models/User.java", + "src/main/java/com/example/models/Post.java", + "src/main/java/com/example/services/UserService.java", + "src/main/java/com/example/services/PostService.java", + "src/main/resources/application.properties", + "src/test/java/com/example/controllers/UserControllerTest.java", + "src/test/java/com/example/controllers/PostControllerTest.java", + "pom.xml", + "Dockerfile" + ], + "excluded_files": [ + "src/main/java/com/example/controllers/UserController.java", + "src/main/java/com/example/controllers/PostController.java", + "src/main/java/com/example/repositories/UserRepository.java", + "src/main/java/com/example/repositories/PostRepository.java" + ] +} + +Example 4: + +Input: + + +app: + # controllers: + # - application_controller.rb + # - users_controller.rb + models: + - user.rb + - post.rb + views: + layouts: + - application.html.erb + users: + - index.html.erb + - show.html.erb + posts: + - index.html.erb + # - show.html.erb + (./): + - Gemfile + - config +config: + environments: + - development.rb + - test.rb + # - production.rb + initializers: + - application_controller_renderer.rb + locales: + - en.yml + # routes.rb +db: + migrate: + - 20211025120523_create_users.rb + - 20211025120530_create_posts.rb +test: + fixtures: + - users.yml + - posts.yml + # controllers: + # - users_controller_test.rb + # - posts_controller_test.rb + models: + - user_test.rb + - post_test.rb + + +Output: + +{ + "included_files": [ + "app/models/user.rb", + "app/models/post.rb", + "app/views/layouts/application.html.erb", + "app/views/users/index.html.erb", + "app/views/users/show.html.erb", + "app/views/posts/index.html.erb", + "app/Gemfile", + "config/environments/development.rb", + "config/environments/test.rb", + "config/initializers/application_controller_renderer.rb", + "config/locales/en.yml", + "db/migrate/20211025120523_create_users.rb", + "db/migrate/20211025120530_create_posts.rb", + "test/fixtures/users.yml", + "test/fixtures/posts.yml", + "test/models/user_test.rb", + "test/models/post_test.rb" + ], + "excluded_files": [ + "app/controllers/application_controller.rb", + "app/controllers/users_controller.rb", + "app/views/posts/show.html.erb", + "config/environments/production.rb", + "config/routes.rb", + "test/controllers/users_controller_test.rb", + "test/controllers/posts_controller_test.rb" + ] +} + +## IMPORTANT +Remember any line that is commented is an excluded file. Any line that is NOT commented - is an included file. \ No newline at end of file diff --git a/gpt_engineer/applications/feature_cli/repository.py b/gpt_engineer/applications/feature_cli/repository.py new file mode 100644 index 0000000000..f6d7d9c054 --- /dev/null +++ b/gpt_engineer/applications/feature_cli/repository.py @@ -0,0 +1,151 @@ +from dataclasses import dataclass +from typing import List + +from git import GitCommandError, Repo + + +@dataclass +class Commit: + """ + Represents a single Git commit with a description and a diff. + """ + + description: str + diff: str + + def __str__(self) -> str: + diff_str = "\n".join(str(d) for d in self.diff) + return f"Commit Description: {self.description}\nDiff:\n{diff_str}" + + +@dataclass +class GitContext: + """ + Represents the Git context of an in progress feature. + """ + + commits: List[Commit] + branch_changes: str + staged_changes: str + unstaged_changes: str + tracked_files: List[str] + + +class Repository: + """ + Manages a git repository, providing functionalities to get repo status, + list files considering .gitignore, and interact with repository history. + """ + + def __init__(self, repo_path: str): + self.repo_path = repo_path + self.repo = Repo(repo_path) + assert not self.repo.bare + + def get_tracked_files(self) -> List[str]: + """ + List all files that are currently tracked by Git in the repository. + """ + try: + tracked_files = self.repo.git.ls_files().split("\n") + return tracked_files + except GitCommandError as e: + print(f"Error listing tracked files: {e}") + return [] + + def get_feature_branch_diff(self): + """ + Get a consolidated diff for the entire feature branch from its divergence point. + + Returns: + - str: The diff representing all changes from the feature branch since its divergence. + """ + current_branch = self.repo.active_branch + + # Get the tracking branch (e.g., 'origin/master') + tracking_branch = current_branch.tracking_branch() + if tracking_branch is None: + print("No tracking branch set, using 'master' as default base branch.") + tracking_branch = self.repo.heads.master # Fallback to 'master' + + try: + # Find the merge base between the current branch and the tracking branch or master + merge_base = self.repo.merge_base(tracking_branch, current_branch) + if merge_base: + merge_base = merge_base[ + 0 + ] # GitPython might return a list of merge bases + + # Generate the diff from the merge base to the latest commit of the feature branch + feature_diff = self.repo.git.diff( + f"{merge_base}..{current_branch}", unified=0 + ) + return feature_diff + except GitCommandError as e: + print(f"Error generating diff: {e}") + return "" + + def get_unstaged_changes(self): + """ + Get the unstaged changes in the repository. + + Returns + ------- + str + The unstaged changes in the repository. + """ + return self.repo.git.diff() + + def get_git_context(self): + staged_changes = self.repo.git.diff("--cached") + unstaged_changes = self.repo.git.diff() + current_branch = self.repo.active_branch + + commits = list(self.repo.iter_commits(rev=current_branch.name)) + + commit_objects = [ + Commit( + commit.summary, + ( + commit.diff(commit.parents[0], create_patch=True) + if commit.parents + else commit.diff(None, create_patch=True) + ), + ) + for commit in commits + ] + + branch_changes = self.get_feature_branch_diff() + + tracked_files = self.get_tracked_files() + + return GitContext( + commit_objects, + branch_changes, + staged_changes, + unstaged_changes, + tracked_files, + ) + + def create_branch(self, branch_name): + """ + Create a new branch in the repository. + + Parameters + ---------- + branch_name : str + The name of the new branch. + """ + self.repo.git.checkout("-b", branch_name) + + def stage_all_changes(self): + """ + Stage all changes in the repository. + """ + self.repo.git.add("--all") + + def undo_unstaged_changes(self): + """ + Undo all unstaged changes in the repository. + """ + self.repo.git.checkout("--", ".") diff --git a/gpt_engineer/applications/feature_cli/task.py b/gpt_engineer/applications/feature_cli/task.py new file mode 100644 index 0000000000..41b422212b --- /dev/null +++ b/gpt_engineer/applications/feature_cli/task.py @@ -0,0 +1,75 @@ +import os +import platform +import subprocess +import shutil +from pathlib import Path +from typing import Union + +from gpt_engineer.core.default.disk_memory import DiskMemory +from gpt_engineer.core.default.paths import memory_path + + +class Task(DiskMemory): + """ + Represents a task that will be done one off without the wider context of a feature + """ + + def __init__(self, project_path: Union[str, Path]): + + self._task_path = Path(memory_path(project_path)) / "task" + self.path = self._task_path + self._task_filename = "task.md" + self._files_filename = "files.yml" + + if not os.path.exists(self._task_path): + os.makedirs(self._task_path) + + self.set_task("Please replace with task description") + + super().__init__(self._task_path) + + def delete(self): + shutil.rmtree(self._task_path) + + def set_task(self, task: str): + """ + Updates the task file with new text. + Parameters + ---------- + task : str + The new task to write to the feature file. + """ + super().__setitem__(self._task_filename, task) + + def get_task(self) -> str: + """ + Retrieve the content of the task file in the database. + Returns + ------- + str + The content of the feature file. + """ + return super().__getitem__(self._task_filename) + + def _file_path(self, filename): + return self._task_path / filename + + def _open_file_in_editor(self, path): + """ + Opens the generated YAML file in the default system editor. + If the YAML file is empty or doesn't exist, generate it first. + """ + + # Platform-specific methods to open the file + if platform.system() == "Windows": + os.startfile(path) + elif platform.system() == "Darwin": + subprocess.run(["open", path]) + else: # Linux and other Unix-like systems + subprocess.run(["xdg-open", path]) + + def open_task_in_editor(self): + """ + Opens the task file in the default system editor. + """ + self._open_file_in_editor(self._file_path(self._task_filename)) diff --git a/gpt_engineer/core/ai.py b/gpt_engineer/core/ai.py index 5db26eb439..8672fe8a8b 100644 --- a/gpt_engineer/core/ai.py +++ b/gpt_engineer/core/ai.py @@ -87,7 +87,7 @@ class AI: def __init__( self, - model_name="gpt-4-turbo", + model_name="gpt-4o", temperature=0.1, azure_endpoint=None, streaming=True, diff --git a/gpt_engineer/core/default/disk_memory.py b/gpt_engineer/core/default/disk_memory.py index 62c7daf32b..e1936fc8c4 100644 --- a/gpt_engineer/core/default/disk_memory.py +++ b/gpt_engineer/core/default/disk_memory.py @@ -171,6 +171,9 @@ def __setitem__(self, key: Union[str, Path], val: str) -> None: full_path.write_text(val, encoding="utf-8") + def set(self, key: Union[str, Path], val: str) -> None: + return self.__setitem__(key, val) + def __delitem__(self, key: Union[str, Path]) -> None: """ Delete a file or directory from the database corresponding to the given key. diff --git a/gpt_engineer/core/default/steps.py b/gpt_engineer/core/default/steps.py index d778948b65..db06bfcc3c 100644 --- a/gpt_engineer/core/default/steps.py +++ b/gpt_engineer/core/default/steps.py @@ -37,7 +37,7 @@ import traceback from pathlib import Path -from typing import List, MutableMapping, Union +from typing import List, MutableMapping, Optional, Union from langchain.schema import HumanMessage, SystemMessage from termcolor import colored @@ -274,6 +274,7 @@ def improve_fn( files_dict: FilesDict, memory: BaseMemory, preprompts_holder: PrepromptsHolder, + additional_context: Optional[str] = None, ) -> FilesDict: """ Improves the code based on user input and returns the updated files. @@ -290,6 +291,8 @@ def improve_fn( The memory interface where the code and related data are stored. preprompts_holder : PrepromptsHolder The holder for preprompt messages that guide the AI model. + additional_context :str + Optional additional context to provide to the AI as part of the request Returns ------- @@ -301,7 +304,9 @@ def improve_fn( SystemMessage(content=setup_sys_prompt_existing_code(preprompts)), ] - # Add files as input + if additional_context: + messages.append(HumanMessage(content=additional_context)) + messages.append(HumanMessage(content=f"{files_dict.to_chat()}")) messages.append(HumanMessage(content=prompt.to_langchain_content())) memory.log( @@ -370,13 +375,13 @@ def flush(self): file.flush() -def handle_improve_mode(prompt, agent, memory, files_dict): +def handle_improve_mode(improve_lambda, memory): captured_output = io.StringIO() old_stdout = sys.stdout sys.stdout = Tee(sys.stdout, captured_output) try: - files_dict = agent.improve(files_dict, prompt) + files_dict = improve_lambda() except Exception as e: print( f"Error while improving the project: {e}\nCould you please upload the debug_log_file.txt in {memory.path}/logs folder to github?\nFULL STACK TRACE:\n" @@ -388,7 +393,6 @@ def handle_improve_mode(prompt, agent, memory, files_dict): # Get the captured output captured_string = captured_output.getvalue() - print(captured_string) memory.log(DEBUG_LOG_FILE, "\nCONSOLE OUTPUT:\n" + captured_string) return files_dict diff --git a/gpt_engineer/core/prompt.py b/gpt_engineer/core/prompt.py index 4d8286343c..85e2e10d75 100644 --- a/gpt_engineer/core/prompt.py +++ b/gpt_engineer/core/prompt.py @@ -9,16 +9,18 @@ def __init__( text: str, image_urls: Optional[Dict[str, str]] = None, entrypoint_prompt: str = "", + prefix: Optional[str] = "Request: ", ): self.text = text self.image_urls = image_urls self.entrypoint_prompt = entrypoint_prompt + self.prefix = prefix def __repr__(self): return f"Prompt(text={self.text!r}, image_urls={self.image_urls!r})" - def to_langchain_content(self): - content = [{"type": "text", "text": f"Request: {self.text}"}] + def to_langchain_content(self) -> Dict[str, str]: + content = [{"type": "text", "text": f"{self.prefix}{self.text}"}] if self.image_urls: for name, url in self.image_urls.items(): diff --git a/gpt_engineer/core/token_usage.py b/gpt_engineer/core/token_usage.py index b10fec9033..b51b937e31 100644 --- a/gpt_engineer/core/token_usage.py +++ b/gpt_engineer/core/token_usage.py @@ -71,11 +71,11 @@ class Tokenizer: def __init__(self, model_name): self.model_name = model_name - self._tiktoken_tokenizer = ( - tiktoken.encoding_for_model(model_name) - if "gpt-4" in model_name or "gpt-3.5" in model_name - else tiktoken.get_encoding("cl100k_base") - ) + + try: + self._tiktoken_tokenizer = tiktoken.encoding_for_model(model_name) + except: + self._tiktoken_tokenizer = tiktoken.get_encoding("cl100k_base") def num_tokens(self, txt: str) -> int: """ diff --git a/pyproject.toml b/pyproject.toml index bb229c8c09..2e5d3a38b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ regex = "^2023.12.25" pillow = "^10.2.0" datasets = "^2.17.1" black = "23.3.0" +gitpython = "^3.0.0" langchain-community = "^0.2.0" [tool.poetry.group.dev.dependencies] @@ -67,6 +68,7 @@ sphinx_copybutton = ">=0.5.2" gpt-engineer = 'gpt_engineer.applications.cli.main:app' ge = 'gpt_engineer.applications.cli.main:app' gpte = 'gpt_engineer.applications.cli.main:app' +gptf = 'gpt_engineer.applications.feature_cli.main:app' bench = 'gpt_engineer.benchmark.__main__:app' gpte_test_application = 'tests.caching_main:app' diff --git a/quicktest.py b/quicktest.py new file mode 100644 index 0000000000..0f2a445d4e --- /dev/null +++ b/quicktest.py @@ -0,0 +1,66 @@ +import os +import yaml +from collections import defaultdict + + +def paths_to_yaml(paths): + def nested_dict(): + return defaultdict(nested_dict) + + tree = nested_dict() + + for path in paths: + parts = path.split(os.sep) + file = parts.pop() + d = tree + for part in parts: + d = d[part] + if "/" not in d: + d["/"] = [] + d["/"].append(file) + + def default_to_regular(d): + if isinstance(d, defaultdict): + d = {k: default_to_regular(v) for k, v in d.items()} + return d + + tree = default_to_regular(tree) + + return yaml.dump(tree, sort_keys=False) + + +def yaml_to_paths(yaml_content): + def traverse_tree(tree, base_path=""): + paths = [] + for key, value in tree.items(): + if key == "./": + for file in value: + paths.append(os.path.join(base_path, file)) + else: + subfolder_path = os.path.join(base_path, key) + paths.extend(traverse_tree(value, subfolder_path)) + return paths + + tree = yaml.safe_load(yaml_content) + return traverse_tree(tree) + + +# Example usage +yaml_content = """ +folder: + ./: + # - file1.txt + - file2.txt + subfolder: + ./: + - file3.txt +""" + +paths = yaml_to_paths(yaml_content) +print(paths) + + +# paths = ["folder/file1.txt", "folder/file2.txt", "folder/subfolder/file3.txt"] + +# yaml_output = paths_to_yaml(paths) +# print(yaml_output) diff --git a/tests/applications/feature_cli/__init__.py b/tests/applications/feature_cli/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/applications/feature_cli/test_file_selection.py b/tests/applications/feature_cli/test_file_selection.py new file mode 100644 index 0000000000..2e9cd514fa --- /dev/null +++ b/tests/applications/feature_cli/test_file_selection.py @@ -0,0 +1,203 @@ +import yaml +import pytest +from dotenv import load_dotenv + +from gpt_engineer.core.ai import AI + +from gpt_engineer.applications.feature_cli.file_selection import ( + FileSelection, + paths_to_tree, + tree_to_paths, + paths_to_tree, + file_selection_to_commented_yaml, + commented_yaml_to_file_selection, +) + +from gpt_engineer.applications.feature_cli.generation_tools import ( + fuzzy_parse_file_selection, +) + + +def test_tree_conversion(): + original_paths = [ + ".github/ISSUE_TEMPLATE/bug-report.md", + ".github/ISSUE_TEMPLATE/documentation-clarification.md", + ".github/ISSUE_TEMPLATE/feature-request.md", + ".github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md", + ".github/workflows/automation.yml", + ".github/workflows/ci.yaml", + ".github/workflows/pre-commit.yaml", + ".github/workflows/release.yaml", + ".github/CODEOWNERS", + ".github/CODE_OF_CONDUCT.md", + ".github/CONTRIBUTING.md", + ".github/FUNDING.yml", + "docker/Dockerfile", + "docker/README.md", + "docker/entrypoint.sh", + "docs/examples/open_llms/README.md", + "docs/examples/open_llms/langchain_interface.py", + ] + + tree = paths_to_tree(original_paths) + reconstructed_paths = tree_to_paths(tree) + + assert sorted(original_paths) == sorted( + reconstructed_paths + ), "The file paths do not match after conversion!" + + +def test_tree_conversion_yaml(): + original_paths = [ + ".github/ISSUE_TEMPLATE/bug-report.md", + ".github/ISSUE_TEMPLATE/documentation-clarification.md", + ".github/ISSUE_TEMPLATE/feature-request.md", + ".github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md", + ".github/workflows/automation.yml", + ".github/workflows/ci.yaml", + ".github/workflows/pre-commit.yaml", + ".github/workflows/release.yaml", + ".github/CODEOWNERS", + ".github/CODE_OF_CONDUCT.md", + ".github/CONTRIBUTING.md", + ".github/FUNDING.yml", + "docker/Dockerfile", + "docker/README.md", + "docker/entrypoint.sh", + "docs/examples/open_llms/README.md", + "docs/examples/open_llms/langchain_interface.py", + ] + + tree = paths_to_tree(original_paths) + yaml_tree = yaml.dump(tree) + read_tree = yaml.safe_load(yaml_tree) + reconstructed_paths = tree_to_paths(read_tree) + + assert sorted(original_paths) == sorted( + reconstructed_paths + ), "The file paths do not match after conversion!" + + +def test_file_selection_to_yaml(): + included_files = [ + "docker/Dockerfile", + "docker/README.md", + "docker/entrypoint.sh", + ] + + excluded_files = [ + ".github/ISSUE_TEMPLATE/bug-report.md", + ".github/ISSUE_TEMPLATE/documentation-clarification.md", + ".github/ISSUE_TEMPLATE/feature-request.md", + ".github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md", + ".github/workflows/automation.yml", + ".github/workflows/ci.yaml", + ".github/workflows/pre-commit.yaml", + ".github/workflows/release.yaml", + ".github/CODEOWNERS", + ".github/CODE_OF_CONDUCT.md", + ".github/CONTRIBUTING.md", + ".github/FUNDING.yml", + "docs/examples/open_llms/README.md", + "docs/examples/open_llms/langchain_interface.py", + ] + + commented_yaml = file_selection_to_commented_yaml( + FileSelection(included_files, excluded_files) + ) + + assert ( + commented_yaml + == """.github: + ISSUE_TEMPLATE: +# - bug-report.md +# - documentation-clarification.md +# - feature-request.md + PULL_REQUEST_TEMPLATE: +# - PULL_REQUEST_TEMPLATE.md + workflows: +# - automation.yml +# - ci.yaml +# - pre-commit.yaml +# - release.yaml + (./): +# - CODEOWNERS +# - CODE_OF_CONDUCT.md +# - CONTRIBUTING.md +# - FUNDING.yml +docker: +- Dockerfile +- README.md +- entrypoint.sh +docs: + examples: + open_llms: +# - README.md +# - langchain_interface.py +""" + ) + + +def test_yaml_to_file_selection(): + included_files = [ + "docker/Dockerfile", + "docker/README.md", + "docker/entrypoint.sh", + ] + + excluded_files = [ + ".github/ISSUE_TEMPLATE/bug-report.md", + ".github/ISSUE_TEMPLATE/documentation-clarification.md", + ".github/ISSUE_TEMPLATE/feature-request.md", + ".github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md", + ".github/workflows/automation.yml", + ".github/workflows/ci.yaml", + ".github/workflows/pre-commit.yaml", + ".github/workflows/release.yaml", + ".github/CODEOWNERS", + ".github/CODE_OF_CONDUCT.md", + ".github/CONTRIBUTING.md", + ".github/FUNDING.yml", + "docs/examples/open_llms/README.md", + "docs/examples/open_llms/langchain_interface.py", + ] + + commented_yaml = file_selection_to_commented_yaml( + FileSelection(included_files, excluded_files) + ) + + file_selection = commented_yaml_to_file_selection(commented_yaml) + + assert sorted(file_selection.included_files) == sorted(included_files) + assert sorted(file_selection.excluded_files) == sorted(excluded_files) + + +@pytest.mark.skip(reason="Skipping as test requires AI") +def test_yaml_to_file_selection_fuzzy(): + + load_dotenv() + + commented_yaml = """# gpt_engineer: +# applications: +# cli: + - __init__.py + - cli_agent.py +# - collect.py + - file_selector.py + - learning.py + - main.py""" + + file_selction = fuzzy_parse_file_selection(AI(), commented_yaml) + + assert file_selction == FileSelection( + [ + "gpt_engineer/applications/cli/__init__.py", + "gpt_engineer/applications/cli/cli_agent.py", + "gpt_engineer/applications/cli/file_selector.py", + "gpt_engineer/applications/cli/learning.py", + "gpt_engineer/applications/cli/main.py", + ], + [ + "gpt_engineer/applications/cli/collect.py", + ], + )