Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: USE_FULL_PROJECT_PROMPT + extracting code from ``` #105

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
26 changes: 24 additions & 2 deletions constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,26 @@
import os

EXTENSION_TO_SKIP = [".png",".jpg",".jpeg",".gif",".bmp",".svg",".ico",".tif",".tiff"]
DEFAULT_DIR = "generated"
DEFAULT_MODEL = "gpt-3.5-turbo" # we recommend 'gpt-4' if you have it # gpt3.5 is going to be worse at generating code so we strongly recommend gpt4. i know most people dont have access, we are working on a hosted version
DEFAULT_MAX_TOKENS = 2000 # i wonder how to tweak this properly. we dont want it to be max length as it encourages verbosity of code. but too short and code also truncates suddenly.

try:
USE_FULL_PROJECT_PROMPT = bool(os.environ["USE_FULL_PROJECT_PROMPT"]) and os.environ["USE_FULL_PROJECT_PROMPT"] != 'False' and os.environ["USE_FULL_PROJECT_PROMPT"] != "0"
except KeyError:
# If enabled for each file generation prompt we will include all the files generated before
# It helps to make code much more consistent
# But requires at least 16k context model even for a small project
USE_FULL_PROJECT_PROMPT = False

print(USE_FULL_PROJECT_PROMPT)

# https://platform.openai.com/docs/models/gpt-4
try:
DEFAULT_MODEL = os.environ["OPENAI_DEFAULT_MODEL"]
except KeyError:
# we recommend 'gpt-4' if you have it # gpt3.5 is going to be worse at generating code so we strongly recommend gpt4. i know most people dont have access, we are working on a hosted version
DEFAULT_MODEL = "gpt-3.5-turbo"
try:
DEFAULT_MAX_TOKENS = int(os.environ["OPENAI_DEFAULT_MAX_TOKENS"])
except KeyError:
# i wonder how to tweak this properly. we dont want it to be max length as it encourages verbosity of code. but too short and code also truncates suddenly.
DEFAULT_MAX_TOKENS = 2000
38 changes: 29 additions & 9 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import os
import re
import modal
import ast
from utils import clean_dir
from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS
from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS, USE_FULL_PROJECT_PROMPT

stub = modal.Stub("smol-developer-v1") # yes we are recommending using Modal by default, as it helps with deployment. see readme for why.
openai_image = modal.Image.debian_slim().pip_install("openai", "tiktoken")
Expand Down Expand Up @@ -47,10 +48,12 @@ def reportTokens(prompt):
params = {
"model": model,
"messages": messages,
"max_tokens": DEFAULT_MAX_TOKENS,
"temperature": 0,
}

if DEFAULT_MAX_TOKENS != 0:
params.max_tokens = DEFAULT_MAX_TOKENS

# Send the API request
response = openai.ChatCompletion.create(**params)

Expand All @@ -60,17 +63,18 @@ def reportTokens(prompt):


@stub.function()
def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_dependencies=None, prompt=None):
def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_dependencies=None, prompt=None, generated_files_content=None):
# call openai api with this prompt
filecode = generate_response.call(model,
f"""You are an AI developer who is trying to write a program that will generate code for the user based on their intent.

the app is: {prompt}

the files we have decided to generate are: {filepaths_string}

the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}

the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}""" +
(f"already generated files are:\n {generated_files_content}" if (USE_FULL_PROJECT_PROMPT and generated_files_content) else "") +
f"""
only write valid code for the given filepath and file type, and return only the code.
do not add any other explanation, only return valid code for that file type.
""",
Expand All @@ -97,7 +101,7 @@ def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_d
""",
)

return filename, filecode
return filename, get_code_from_string(filecode)


@stub.local_entrypoint()
Expand Down Expand Up @@ -163,17 +167,33 @@ def main(prompt, directory=DEFAULT_DIR, model=DEFAULT_MODEL, file=None):
print(shared_dependencies)
# write shared dependencies as a md file inside the generated directory
write_file("shared_dependencies.md", shared_dependencies, directory)

generated_files_content = ""
# Iterate over generated files and write them to the specified directory
for filename, filecode in generate_file.map(
list_actual, order_outputs=False, kwargs=dict(model=model, filepaths_string=filepaths_string, shared_dependencies=shared_dependencies, prompt=prompt)
list_actual, order_outputs=False, kwargs=dict(model=model, filepaths_string=filepaths_string, shared_dependencies=shared_dependencies, prompt=prompt, generated_files_content=generated_files_content)
):
write_file(filename, filecode, directory)
generated_files_content += f"{directory}/{filename}\n"
generated_files_content += "\n"
generated_files_content += filecode
generated_files_content += "\n"


except ValueError:
print("Failed to parse result")

# sometimes GPT-3.5 still returns some words around the content of the file
# example:
# # Makefile
# ```makefile
# contents
# ````
def get_code_from_string(input_string):
match = re.search(r'```[^\n]*?\n([\s\S]+?)\n```', input_string)
if match:
return match.group(1)
else:
return input_string

def write_file(filename, filecode, directory):
# Output the filename in blue color
Expand Down
36 changes: 29 additions & 7 deletions main_no_modal.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import sys
import os
import re
import ast
from time import sleep
from utils import clean_dir
from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS
from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS, USE_FULL_PROJECT_PROMPT

def generate_response(system_prompt, user_prompt, *args):
import openai
Expand Down Expand Up @@ -40,10 +41,12 @@ def reportTokens(prompt):
params = {
"model": DEFAULT_MODEL,
"messages": messages,
"max_tokens": DEFAULT_MAX_TOKENS,
"temperature": 0,
}

if DEFAULT_MAX_TOKENS != 0:
params.max_tokens = DEFAULT_MAX_TOKENS

# Send the API request
keep_trying = True
while keep_trying:
Expand All @@ -62,7 +65,7 @@ def reportTokens(prompt):


def generate_file(
filename, filepaths_string=None, shared_dependencies=None, prompt=None
filename, filepaths_string=None, shared_dependencies=None, prompt=None, generated_files_content=None
):
# call openai api with this prompt
filecode = generate_response(
Expand All @@ -72,8 +75,9 @@ def generate_file(

the files we have decided to generate are: {filepaths_string}

the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}

the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}""" +
(f"already generated files are:\n {generated_files_content}" if (USE_FULL_PROJECT_PROMPT and generated_files_content) else "") +
f"""
only write valid code for the given filepath and file type, and return only the code.
do not add any other explanation, only return valid code for that file type.
""",
Expand All @@ -100,7 +104,7 @@ def generate_file(
""",
)

return filename, filecode
return filename, get_code_from_string(filecode)


def main(prompt, directory=DEFAULT_DIR, file=None):
Expand Down Expand Up @@ -174,19 +178,37 @@ def main(prompt, directory=DEFAULT_DIR, file=None):
print(shared_dependencies)
# write shared dependencies as a md file inside the generated directory
write_file("shared_dependencies.md", shared_dependencies, directory)

generated_files_content = ""
for name in list_actual:
filename, filecode = generate_file(
name,
filepaths_string=filepaths_string,
shared_dependencies=shared_dependencies,
prompt=prompt,
generated_files_content=generated_files_content,
)
write_file(filename, filecode, directory)
generated_files_content += f"{directory}/{filename}\n"
generated_files_content += "\n"
generated_files_content += filecode
generated_files_content += "\n"


except ValueError:
print("Failed to parse result: " + result)

# sometimes GPT-3.5 still returns some words around the content of the file
# example:
# # Makefile
# ```makefile
# contents
# ````
def get_code_from_string(input_string):
match = re.search(r'```[^\n]*?\n([\s\S]+?)\n```', input_string)
if match:
return match.group(1)
else:
return input_string

def write_file(filename, filecode, directory):
# Output the filename in blue color
Expand Down