smol-ai · goooseman · Jun 17, 2023 · Jun 17, 2023 · Jun 17, 2023 · Jun 19, 2023
diff --git a/constants.py b/constants.py
@@ -1,4 +1,26 @@
+import os
+
 EXTENSION_TO_SKIP = [".png",".jpg",".jpeg",".gif",".bmp",".svg",".ico",".tif",".tiff"]
 DEFAULT_DIR = "generated"
-DEFAULT_MODEL = "gpt-3.5-turbo" # we recommend 'gpt-4' if you have it # gpt3.5 is going to be worse at generating code so we strongly recommend gpt4. i know most people dont have access, we are working on a hosted version 
-DEFAULT_MAX_TOKENS = 2000 # i wonder how to tweak this properly. we dont want it to be max length as it encourages verbosity of code. but too short and code also truncates suddenly.
+
+try:
+    USE_FULL_PROJECT_PROMPT = bool(os.environ["USE_FULL_PROJECT_PROMPT"]) and os.environ["USE_FULL_PROJECT_PROMPT"] != 'False' and os.environ["USE_FULL_PROJECT_PROMPT"] != "0"
+except KeyError:
+    # If enabled for each file generation prompt we will include all the files generated before
+    # It helps to make code much more consistent
+    # But requires at least 16k context model even for a small project
+    USE_FULL_PROJECT_PROMPT = False
+
+print(USE_FULL_PROJECT_PROMPT)
+
+# https://platform.openai.com/docs/models/gpt-4
+try:
+    DEFAULT_MODEL = os.environ["OPENAI_DEFAULT_MODEL"]
+except KeyError:
+    # we recommend 'gpt-4' if you have it # gpt3.5 is going to be worse at generating code so we strongly recommend gpt4. i know most people dont have access, we are working on a hosted version 
+    DEFAULT_MODEL = "gpt-3.5-turbo"
+try:
+    DEFAULT_MAX_TOKENS = int(os.environ["OPENAI_DEFAULT_MAX_TOKENS"])
+except KeyError:
+    # i wonder how to tweak this properly. we dont want it to be max length as it encourages verbosity of code. but too short and code also truncates suddenly.
+    DEFAULT_MAX_TOKENS = 2000
diff --git a/main.py b/main.py
@@ -1,8 +1,9 @@
 import os
+import re
 import modal
 import ast
 from utils import clean_dir
-from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS
+from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS, USE_FULL_PROJECT_PROMPT
 
 stub = modal.Stub("smol-developer-v1") # yes we are recommending using Modal by default, as it helps with deployment. see readme for why.
 openai_image = modal.Image.debian_slim().pip_install("openai", "tiktoken")
@@ -47,10 +48,12 @@ def reportTokens(prompt):
     params = {
         "model": model,
         "messages": messages,
-        "max_tokens": DEFAULT_MAX_TOKENS,
         "temperature": 0,
     }
 
+    if DEFAULT_MAX_TOKENS != 0:
+        params.max_tokens = DEFAULT_MAX_TOKENS
+
     # Send the API request
     response = openai.ChatCompletion.create(**params)
 
@@ -60,17 +63,18 @@ def reportTokens(prompt):
 
 
 @stub.function()
-def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_dependencies=None, prompt=None):
+def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_dependencies=None, prompt=None, generated_files_content=None):
     # call openai api with this prompt
     filecode = generate_response.call(model, 
         f"""You are an AI developer who is trying to write a program that will generate code for the user based on their intent.
-        
+
     the app is: {prompt}
 
     the files we have decided to generate are: {filepaths_string}
 
-    the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}
-
+    the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}""" +
+    (f"already generated files are:\n {generated_files_content}" if (USE_FULL_PROJECT_PROMPT and generated_files_content) else "") +
+        f"""
     only write valid code for the given filepath and file type, and return only the code.
     do not add any other explanation, only return valid code for that file type.
     """,
@@ -97,7 +101,7 @@ def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_d
     """,
     )
 
-    return filename, filecode
+    return filename, get_code_from_string(filecode)
 
 
 @stub.local_entrypoint()
@@ -163,17 +167,33 @@ def main(prompt, directory=DEFAULT_DIR, model=DEFAULT_MODEL, file=None):
             print(shared_dependencies)
             # write shared dependencies as a md file inside the generated directory
             write_file("shared_dependencies.md", shared_dependencies, directory)
-
+            generated_files_content = ""
             # Iterate over generated files and write them to the specified directory
             for filename, filecode in generate_file.map(
-                list_actual, order_outputs=False, kwargs=dict(model=model, filepaths_string=filepaths_string, shared_dependencies=shared_dependencies, prompt=prompt)
+                list_actual, order_outputs=False, kwargs=dict(model=model, filepaths_string=filepaths_string, shared_dependencies=shared_dependencies, prompt=prompt, generated_files_content=generated_files_content)
             ):
                 write_file(filename, filecode, directory)
+                generated_files_content += f"{directory}/{filename}\n"
+                generated_files_content += "\n"
+                generated_files_content += filecode
+                generated_files_content += "\n"
 
 
     except ValueError:
         print("Failed to parse result")
 
+# sometimes GPT-3.5 still returns some words around the content of the file
+# example:
+# # Makefile
+# ```makefile
+# contents
+# ````
+def get_code_from_string(input_string):
+    match = re.search(r'```[^\n]*?\n([\s\S]+?)\n```', input_string)
+    if match:
+        return match.group(1)
+    else:
+        return input_string
 
 def write_file(filename, filecode, directory):
     # Output the filename in blue color

diff --git a/main_no_modal.py b/main_no_modal.py
@@ -1,9 +1,10 @@
 import sys
 import os
+import re
 import ast
 from time import sleep
 from utils import clean_dir
-from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS
+from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS, USE_FULL_PROJECT_PROMPT
 
 def generate_response(system_prompt, user_prompt, *args):
     import openai
@@ -40,10 +41,12 @@ def reportTokens(prompt):
     params = {
         "model": DEFAULT_MODEL,
         "messages": messages,
-        "max_tokens": DEFAULT_MAX_TOKENS,
         "temperature": 0,
     }
 
+    if DEFAULT_MAX_TOKENS != 0:
+        params.max_tokens = DEFAULT_MAX_TOKENS
+
     # Send the API request
     keep_trying = True
     while keep_trying:
@@ -62,7 +65,7 @@ def reportTokens(prompt):
 
 
 def generate_file(
-    filename, filepaths_string=None, shared_dependencies=None, prompt=None
+    filename, filepaths_string=None, shared_dependencies=None, prompt=None, generated_files_content=None
 ):
     # call openai api with this prompt
     filecode = generate_response(
@@ -72,8 +75,9 @@ def generate_file(
 
     the files we have decided to generate are: {filepaths_string}
 
-    the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}
-
+    the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}""" +
+    (f"already generated files are:\n {generated_files_content}" if (USE_FULL_PROJECT_PROMPT and generated_files_content) else "") +
+        f"""
     only write valid code for the given filepath and file type, and return only the code.
     do not add any other explanation, only return valid code for that file type.
     """,
@@ -100,7 +104,7 @@ def generate_file(
     """,
     )
 
-    return filename, filecode
+    return filename, get_code_from_string(filecode)
 
 
 def main(prompt, directory=DEFAULT_DIR, file=None):
@@ -174,19 +178,37 @@ def main(prompt, directory=DEFAULT_DIR, file=None):
             print(shared_dependencies)
             # write shared dependencies as a md file inside the generated directory
             write_file("shared_dependencies.md", shared_dependencies, directory)
-
+            generated_files_content = ""
             for name in list_actual:
                 filename, filecode = generate_file(
                     name,
                     filepaths_string=filepaths_string,
                     shared_dependencies=shared_dependencies,
                     prompt=prompt,
+                    generated_files_content=generated_files_content,
                 )
                 write_file(filename, filecode, directory)
+                generated_files_content += f"{directory}/{filename}\n"
+                generated_files_content += "\n"
+                generated_files_content += filecode
+                generated_files_content += "\n"
+
 
     except ValueError:
         print("Failed to parse result: " + result)
 
+# sometimes GPT-3.5 still returns some words around the content of the file
+# example:
+# # Makefile
+# ```makefile
+# contents
+# ````
+def get_code_from_string(input_string):
+    match = re.search(r'```[^\n]*?\n([\s\S]+?)\n```', input_string)
+    if match:
+        return match.group(1)
+    else:
+        return input_string
 
 def write_file(filename, filecode, directory):
     # Output the filename in blue color