From 70159458d77e0edd41b4015def519e541444bce0 Mon Sep 17 00:00:00 2001
From: Alexander Gusman <alexander_gusman@intuit.com>
Date: Sat, 17 Jun 2023 08:16:42 +0200
Subject: [PATCH 1/8] feat: pass DEFAULT_MODEL and DEFAULT_MAX_TOKENS from CLI

---
 constants.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/constants.py b/constants.py
index b7ccc11b4..cdc8c7230 100644
--- a/constants.py
+++ b/constants.py
@@ -1,4 +1,15 @@
+import os
+
 EXTENSION_TO_SKIP = [".png",".jpg",".jpeg",".gif",".bmp",".svg",".ico",".tif",".tiff"]
 DEFAULT_DIR = "generated"
-DEFAULT_MODEL = "gpt-3.5-turbo" # we recommend 'gpt-4' if you have it # gpt3.5 is going to be worse at generating code so we strongly recommend gpt4. i know most people dont have access, we are working on a hosted version 
-DEFAULT_MAX_TOKENS = 2000 # i wonder how to tweak this properly. we dont want it to be max length as it encourages verbosity of code. but too short and code also truncates suddenly.
\ No newline at end of file
+# https://platform.openai.com/docs/models/gpt-4
+try:
+    DEFAULT_MODEL = os.environ["OPENAI_DEFAULT_MODEL"]
+except KeyError:
+    # we recommend 'gpt-4' if you have it # gpt3.5 is going to be worse at generating code so we strongly recommend gpt4. i know most people dont have access, we are working on a hosted version 
+    DEFAULT_MODEL = "gpt-3.5-turbo"
+try:
+    DEFAULT_MAX_TOKENS = int(os.environ["OPENAI_DEFAULT_MAX_TOKENS"])
+except KeyError:
+    # i wonder how to tweak this properly. we dont want it to be max length as it encourages verbosity of code. but too short and code also truncates suddenly.
+    DEFAULT_MAX_TOKENS = 2000
\ No newline at end of file

From 781fa4664ae5b051c2b130315a93578b2b62df31 Mon Sep 17 00:00:00 2001
From: Alexander Gusman <alexander_gusman@intuit.com>
Date: Sat, 17 Jun 2023 09:31:51 +0200
Subject: [PATCH 2/8] feat: USE_FULL_PROJECT_PROMPT + extracting code from ```

USE_FULL_PROJECT_PROMPT If enabled for each file generation prompt we will include all the files generated before
Also, sometimes GPT-3.5 still returns some words around the content of the file
This change extracts code from ``` blocks
Also, OPENAI_DEFAULT_MODEL and OPENAI_DEFAULT_MAX_TOKENS env vars added
---
 constants.py     |  9 +++++++++
 main.py          | 32 +++++++++++++++++++++++++-------
 main_no_modal.py | 31 +++++++++++++++++++++++++------
 3 files changed, 59 insertions(+), 13 deletions(-)

diff --git a/constants.py b/constants.py
index cdc8c7230..baa3d9b57 100644
--- a/constants.py
+++ b/constants.py
@@ -2,6 +2,15 @@
 
 EXTENSION_TO_SKIP = [".png",".jpg",".jpeg",".gif",".bmp",".svg",".ico",".tif",".tiff"]
 DEFAULT_DIR = "generated"
+
+try:
+    USE_FULL_PROJECT_PROMPT = bool(os.environ["USE_FULL_PROJECT_PROMPT"])
+except KeyError:
+    # If enabled for each file generation prompt we will include all the files generated before
+    # It helps to make code much more consistent
+    # But requires at least 16k context model even for a small project
+    USE_FULL_PROJECT_PROMPT = False
+
 # https://platform.openai.com/docs/models/gpt-4
 try:
     DEFAULT_MODEL = os.environ["OPENAI_DEFAULT_MODEL"]
diff --git a/main.py b/main.py
index 9d7ecd5db..59251e3f2 100644
--- a/main.py
+++ b/main.py
@@ -1,8 +1,9 @@
 import os
+import re
 import modal
 import ast
 from utils import clean_dir
-from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS
+from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS, USE_FULL_PROJECT_PROMPT
 
 stub = modal.Stub("smol-developer-v1") # yes we are recommending using Modal by default, as it helps with deployment. see readme for why.
 openai_image = modal.Image.debian_slim().pip_install("openai", "tiktoken")
@@ -60,17 +61,18 @@ def reportTokens(prompt):
 
 
 @stub.function()
-def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_dependencies=None, prompt=None):
+def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_dependencies=None, prompt=None, generatedFilesContent=None):
     # call openai api with this prompt
     filecode = generate_response.call(model, 
         f"""You are an AI developer who is trying to write a program that will generate code for the user based on their intent.
-        
+
     the app is: {prompt}
 
     the files we have decided to generate are: {filepaths_string}
 
-    the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}
-    
+    the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}""" +
+    (f"already generated files are:\n {generatedFilesContent}" if (USE_FULL_PROJECT_PROMPT and generatedFilesContent) else "") +
+        f"""
     only write valid code for the given filepath and file type, and return only the code.
     do not add any other explanation, only return valid code for that file type.
     """,
@@ -97,7 +99,7 @@ def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_d
     """,
     )
 
-    return filename, filecode
+    return filename, get_code_from_string(filecode)
 
 
 @stub.local_entrypoint()
@@ -163,17 +165,33 @@ def main(prompt, directory=DEFAULT_DIR, model=DEFAULT_MODEL, file=None):
             print(shared_dependencies)
             # write shared dependencies as a md file inside the generated directory
             write_file("shared_dependencies.md", shared_dependencies, directory)
-            
+            generated_files_content = ""
             # Iterate over generated files and write them to the specified directory
             for filename, filecode in generate_file.map(
                 list_actual, order_outputs=False, kwargs=dict(model=model, filepaths_string=filepaths_string, shared_dependencies=shared_dependencies, prompt=prompt)
             ):
                 write_file(filename, filecode, directory)
+                generated_files_content += f"{directory}/{filename}\n"
+                generated_files_content += "\n"
+                generated_files_content += filecode
+                generated_files_content += "\n"
 
 
     except ValueError:
         print("Failed to parse result")
 
+# sometimes GPT-3.5 still returns some words around the content of the file
+# example:
+# # Makefile
+# ```makefile
+# contents
+# ````
+def get_code_from_string(input_string):
+    match = re.search(r'```[^\n]+?\n([\s\S]+?)\n```', input_string)
+    if match:
+        return match.group(1)
+    else:
+        return input_string
 
 def write_file(filename, filecode, directory):
     # Output the filename in blue color
diff --git a/main_no_modal.py b/main_no_modal.py
index b3edba516..810577c6a 100644
--- a/main_no_modal.py
+++ b/main_no_modal.py
@@ -1,9 +1,10 @@
 import sys
 import os
+import re
 import ast
 from time import sleep
 from utils import clean_dir
-from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS
+from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS, USE_FULL_PROJECT_PROMPT
 
 def generate_response(system_prompt, user_prompt, *args):
     import openai
@@ -62,7 +63,7 @@ def reportTokens(prompt):
 
 
 def generate_file(
-    filename, filepaths_string=None, shared_dependencies=None, prompt=None
+    filename, filepaths_string=None, shared_dependencies=None, prompt=None, generatedFilesContent=None
 ):
     # call openai api with this prompt
     filecode = generate_response(
@@ -72,8 +73,9 @@ def generate_file(
 
     the files we have decided to generate are: {filepaths_string}
 
-    the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}
-
+    the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}""" +
+    (f"already generated files are:\n {generatedFilesContent}" if (USE_FULL_PROJECT_PROMPT and generatedFilesContent) else "") +
+        f"""
     only write valid code for the given filepath and file type, and return only the code.
     do not add any other explanation, only return valid code for that file type.
     """,
@@ -100,7 +102,7 @@ def generate_file(
     """,
     )
 
-    return filename, filecode
+    return filename, get_code_from_string(filecode)
 
 
 def main(prompt, directory=DEFAULT_DIR, file=None):
@@ -174,7 +176,7 @@ def main(prompt, directory=DEFAULT_DIR, file=None):
             print(shared_dependencies)
             # write shared dependencies as a md file inside the generated directory
             write_file("shared_dependencies.md", shared_dependencies, directory)
-
+            generated_files_content = ""
             for name in list_actual:
                 filename, filecode = generate_file(
                     name,
@@ -183,10 +185,27 @@ def main(prompt, directory=DEFAULT_DIR, file=None):
                     prompt=prompt,
                 )
                 write_file(filename, filecode, directory)
+                generated_files_content += f"{directory}/{filename}\n"
+                generated_files_content += "\n"
+                generated_files_content += filecode
+                generated_files_content += "\n"
+
 
     except ValueError:
         print("Failed to parse result: " + result)
 
+# sometimes GPT-3.5 still returns some words around the content of the file
+# example:
+# # Makefile
+# ```makefile
+# contents
+# ````
+def get_code_from_string(input_string):
+    match = re.search(r'```[^\n]+?\n([\s\S]+?)\n```', input_string)
+    if match:
+        return match.group(1)
+    else:
+        return input_string
 
 def write_file(filename, filecode, directory):
     # Output the filename in blue color

From 141732335d19d1dc159e9b908c78bfced47465e1 Mon Sep 17 00:00:00 2001
From: Alexander Gusman <alexander_gusman@intuit.com>
Date: Sat, 17 Jun 2023 11:05:23 +0200
Subject: [PATCH 3/8] fix: pass the generated files

---
 main.py          | 6 +++---
 main_no_modal.py | 5 +++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/main.py b/main.py
index 59251e3f2..4a41a0b8e 100644
--- a/main.py
+++ b/main.py
@@ -61,7 +61,7 @@ def reportTokens(prompt):
 
 
 @stub.function()
-def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_dependencies=None, prompt=None, generatedFilesContent=None):
+def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_dependencies=None, prompt=None, generated_files_content=None):
     # call openai api with this prompt
     filecode = generate_response.call(model, 
         f"""You are an AI developer who is trying to write a program that will generate code for the user based on their intent.
@@ -71,7 +71,7 @@ def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_d
     the files we have decided to generate are: {filepaths_string}
 
     the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}""" +
-    (f"already generated files are:\n {generatedFilesContent}" if (USE_FULL_PROJECT_PROMPT and generatedFilesContent) else "") +
+    (f"already generated files are:\n {generated_files_content}" if (USE_FULL_PROJECT_PROMPT and generated_files_content) else "") +
         f"""
     only write valid code for the given filepath and file type, and return only the code.
     do not add any other explanation, only return valid code for that file type.
@@ -168,7 +168,7 @@ def main(prompt, directory=DEFAULT_DIR, model=DEFAULT_MODEL, file=None):
             generated_files_content = ""
             # Iterate over generated files and write them to the specified directory
             for filename, filecode in generate_file.map(
-                list_actual, order_outputs=False, kwargs=dict(model=model, filepaths_string=filepaths_string, shared_dependencies=shared_dependencies, prompt=prompt)
+                list_actual, order_outputs=False, kwargs=dict(model=model, filepaths_string=filepaths_string, shared_dependencies=shared_dependencies, prompt=prompt, generated_files_content=generated_files_content)
             ):
                 write_file(filename, filecode, directory)
                 generated_files_content += f"{directory}/{filename}\n"
diff --git a/main_no_modal.py b/main_no_modal.py
index 810577c6a..bea1a30ca 100644
--- a/main_no_modal.py
+++ b/main_no_modal.py
@@ -63,7 +63,7 @@ def reportTokens(prompt):
 
 
 def generate_file(
-    filename, filepaths_string=None, shared_dependencies=None, prompt=None, generatedFilesContent=None
+    filename, filepaths_string=None, shared_dependencies=None, prompt=None, generated_files_content=None
 ):
     # call openai api with this prompt
     filecode = generate_response(
@@ -74,7 +74,7 @@ def generate_file(
     the files we have decided to generate are: {filepaths_string}
 
     the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}""" +
-    (f"already generated files are:\n {generatedFilesContent}" if (USE_FULL_PROJECT_PROMPT and generatedFilesContent) else "") +
+    (f"already generated files are:\n {generated_files_content}" if (USE_FULL_PROJECT_PROMPT and generated_files_content) else "") +
         f"""
     only write valid code for the given filepath and file type, and return only the code.
     do not add any other explanation, only return valid code for that file type.
@@ -183,6 +183,7 @@ def main(prompt, directory=DEFAULT_DIR, file=None):
                     filepaths_string=filepaths_string,
                     shared_dependencies=shared_dependencies,
                     prompt=prompt,
+                    generated_files_content=generated_files_content,
                 )
                 write_file(filename, filecode, directory)
                 generated_files_content += f"{directory}/{filename}\n"

From 75f2ea79004bc1b8e5d1aa493acf5df36982d81e Mon Sep 17 00:00:00 2001
From: Alexander Gusman <alexander_gusman@intuit.com>
Date: Mon, 19 Jun 2023 09:37:34 +0300
Subject: [PATCH 4/8] fix(code_extract): should extract even from ``` blocks

---
 main.py          |  2 +-
 main_no_modal.py | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/main.py b/main.py
index 4a41a0b8e..55f2d5981 100644
--- a/main.py
+++ b/main.py
@@ -187,7 +187,7 @@ def main(prompt, directory=DEFAULT_DIR, model=DEFAULT_MODEL, file=None):
 # contents
 # ````
 def get_code_from_string(input_string):
-    match = re.search(r'```[^\n]+?\n([\s\S]+?)\n```', input_string)
+    match = re.search(r'```[^\n]*?\n([\s\S]+?)\n```', input_string)
     if match:
         return match.group(1)
     else:
diff --git a/main_no_modal.py b/main_no_modal.py
index bea1a30ca..6ec2b2bd7 100644
--- a/main_no_modal.py
+++ b/main_no_modal.py
@@ -7,7 +7,7 @@
 from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS, USE_FULL_PROJECT_PROMPT
 
 def generate_response(system_prompt, user_prompt, *args):
-    import openai
+    import genstudiopy
     import tiktoken
 
     def reportTokens(prompt):
@@ -24,7 +24,7 @@ def reportTokens(prompt):
         )
 
     # Set up your OpenAI API credentials
-    openai.api_key = os.environ["OPENAI_API_KEY"]
+    # genstudiopy.api_key = os.environ["OPENAI_API_KEY"]
 
     messages = []
     messages.append({"role": "system", "content": system_prompt})
@@ -41,7 +41,7 @@ def reportTokens(prompt):
     params = {
         "model": DEFAULT_MODEL,
         "messages": messages,
-        "max_tokens": DEFAULT_MAX_TOKENS,
+        # "max_tokens": DEFAULT_MAX_TOKENS,
         "temperature": 0,
     }
 
@@ -49,7 +49,7 @@ def reportTokens(prompt):
     keep_trying = True
     while keep_trying:
         try:
-            response = openai.ChatCompletion.create(**params)
+            response = genstudiopy.ChatCompletion.create(**params)
             keep_trying = False
         except Exception as e:
             # e.g. when the API is too busy, we don't want to fail everything
@@ -202,7 +202,7 @@ def main(prompt, directory=DEFAULT_DIR, file=None):
 # contents
 # ````
 def get_code_from_string(input_string):
-    match = re.search(r'```[^\n]+?\n([\s\S]+?)\n```', input_string)
+    match = re.search(r'```[^\n]*?\n([\s\S]+?)\n```', input_string)
     if match:
         return match.group(1)
     else:

From cd58f9f2e87e25cc16a8c105e2cfb9d062eb4a27 Mon Sep 17 00:00:00 2001
From: Alexander Gusman <alexander_gusman@intuit.com>
Date: Mon, 19 Jun 2023 10:31:22 +0300
Subject: [PATCH 5/8] fix(code_extract): should extract even from ``` blocks

---
 main.py          | 2 +-
 main_no_modal.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/main.py b/main.py
index 4a41a0b8e..55f2d5981 100644
--- a/main.py
+++ b/main.py
@@ -187,7 +187,7 @@ def main(prompt, directory=DEFAULT_DIR, model=DEFAULT_MODEL, file=None):
 # contents
 # ````
 def get_code_from_string(input_string):
-    match = re.search(r'```[^\n]+?\n([\s\S]+?)\n```', input_string)
+    match = re.search(r'```[^\n]*?\n([\s\S]+?)\n```', input_string)
     if match:
         return match.group(1)
     else:
diff --git a/main_no_modal.py b/main_no_modal.py
index bea1a30ca..d75db5026 100644
--- a/main_no_modal.py
+++ b/main_no_modal.py
@@ -202,7 +202,7 @@ def main(prompt, directory=DEFAULT_DIR, file=None):
 # contents
 # ````
 def get_code_from_string(input_string):
-    match = re.search(r'```[^\n]+?\n([\s\S]+?)\n```', input_string)
+    match = re.search(r'```[^\n]*?\n([\s\S]+?)\n```', input_string)
     if match:
         return match.group(1)
     else:

From 42e477b0c1bddd5cc4d3bafa0638e34035e7fac4 Mon Sep 17 00:00:00 2001
From: Alexander Gusman <alexander_gusman@intuit.com>
Date: Wed, 21 Jun 2023 16:00:49 +0300
Subject: [PATCH 6/8] fix: if DEFAULT_MAX_TOKENS is 0, skip this field

---
 main.py          | 4 +++-
 main_no_modal.py | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/main.py b/main.py
index 55f2d5981..61f50882c 100644
--- a/main.py
+++ b/main.py
@@ -48,10 +48,12 @@ def reportTokens(prompt):
     params = {
         "model": model,
         "messages": messages,
-        "max_tokens": DEFAULT_MAX_TOKENS,
         "temperature": 0,
     }
 
+    if DEFAULT_MAX_TOKENS != 0:
+        params.max_tokens = DEFAULT_MAX_TOKENS
+
     # Send the API request
     response = openai.ChatCompletion.create(**params)
 
diff --git a/main_no_modal.py b/main_no_modal.py
index d75db5026..15df37e53 100644
--- a/main_no_modal.py
+++ b/main_no_modal.py
@@ -41,10 +41,12 @@ def reportTokens(prompt):
     params = {
         "model": DEFAULT_MODEL,
         "messages": messages,
-        "max_tokens": DEFAULT_MAX_TOKENS,
         "temperature": 0,
     }
 
+    if DEFAULT_MAX_TOKENS != 0:
+        params.max_tokens = DEFAULT_MAX_TOKENS
+
     # Send the API request
     keep_trying = True
     while keep_trying:

From 952060f1ffb2dffce1a9050124f4f4a60e390e91 Mon Sep 17 00:00:00 2001
From: Alexander Gusman <alexander_gusman@intuit.com>
Date: Wed, 21 Jun 2023 17:33:14 +0300
Subject: [PATCH 7/8] fix: revert unnecessary changes

---
 main_no_modal.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/main_no_modal.py b/main_no_modal.py
index c9492a6f8..15df37e53 100644
--- a/main_no_modal.py
+++ b/main_no_modal.py
@@ -7,7 +7,7 @@
 from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS, USE_FULL_PROJECT_PROMPT
 
 def generate_response(system_prompt, user_prompt, *args):
-    import genstudiopy
+    import openai
     import tiktoken
 
     def reportTokens(prompt):
@@ -24,7 +24,7 @@ def reportTokens(prompt):
         )
 
     # Set up your OpenAI API credentials
-    # genstudiopy.api_key = os.environ["OPENAI_API_KEY"]
+    openai.api_key = os.environ["OPENAI_API_KEY"]
 
     messages = []
     messages.append({"role": "system", "content": system_prompt})
@@ -51,7 +51,7 @@ def reportTokens(prompt):
     keep_trying = True
     while keep_trying:
         try:
-            response = genstudiopy.ChatCompletion.create(**params)
+            response = openai.ChatCompletion.create(**params)
             keep_trying = False
         except Exception as e:
             # e.g. when the API is too busy, we don't want to fail everything

From 22dcf975d2cdc8fa00f7e6b923fe1b6a72fe3ccf Mon Sep 17 00:00:00 2001
From: Alexander Gusman <alexander_gusman@intuit.com>
Date: Thu, 22 Jun 2023 13:19:04 +0300
Subject: [PATCH 8/8] fix(USE_FULL_PROJECT_PROMPT): 0 or False should also be
 false

---
 constants.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/constants.py b/constants.py
index baa3d9b57..2717255d9 100644
--- a/constants.py
+++ b/constants.py
@@ -4,13 +4,15 @@
 DEFAULT_DIR = "generated"
 
 try:
-    USE_FULL_PROJECT_PROMPT = bool(os.environ["USE_FULL_PROJECT_PROMPT"])
+    USE_FULL_PROJECT_PROMPT = bool(os.environ["USE_FULL_PROJECT_PROMPT"]) and os.environ["USE_FULL_PROJECT_PROMPT"] != 'False' and os.environ["USE_FULL_PROJECT_PROMPT"] != "0"
 except KeyError:
     # If enabled for each file generation prompt we will include all the files generated before
     # It helps to make code much more consistent
     # But requires at least 16k context model even for a small project
     USE_FULL_PROJECT_PROMPT = False
 
+print(USE_FULL_PROJECT_PROMPT)
+
 # https://platform.openai.com/docs/models/gpt-4
 try:
     DEFAULT_MODEL = os.environ["OPENAI_DEFAULT_MODEL"]