tenxstudio · betogaona7 · Sep 26, 2023 · Sep 25, 2023 · Sep 25, 2023 · Sep 25, 2023
diff --git a/cli.py b/cli.py
@@ -44,7 +44,9 @@ def process_repository(
     model_name: str = DEFAULT_MODEL_NAME,
     fuse: bool = False,
     debug: bool = False,
+    is_estimation: bool = True,
 ) -> None:
+    cost = 0
     folder_tales = {
         "repository_name": os.path.basename(os.path.abspath(root_path)),
         "folders": [],
@@ -90,7 +92,7 @@ def process_repository(
 
             folder_full_name = os.path.relpath(folder_path, root_path)
 
-            folder_readme, folder_tale = process_folder(
+            folder_readme, folder_tale, folder_cost = process_folder(
                 folder_path=folder_path,
                 output_path=os.path.join(output_path, folder_full_name)
                 if folder_full_name != "."
@@ -99,7 +101,9 @@ def process_repository(
                 fuse=fuse,
                 debug=debug,
                 folder_full_name=folder_full_name,
+                is_estimation=is_estimation,
             )
+            cost += folder_cost
 
         except Exception as e:
             folder_name = os.path.basename(folder_path)
@@ -133,9 +137,13 @@ def process_repository(
 
     if folder_tales:
         folder_summaries = split_text(str(folder_tales), chunk_size=15000)
-        root_readme = redact_tale_information(
-            "root-level", folder_summaries, model_name="gpt-3.5-turbo-16k"
-        )["text"]
+        root_readme, call_cost = redact_tale_information(
+            "root-level",
+            folder_summaries,
+            model_name="gpt-3.5-turbo-16k",
+            is_estimation=is_estimation,
+        )
+        cost += call_cost
         root_readme = root_readme.replace("----------", "")
 
         # inject folders information
@@ -158,15 +166,18 @@ def process_repository(
 
             root_readme = root_readme + modified_original_readme
 
-        logger.info("save root json..")
-        with open(os.path.join(output_path, "root_level.json"), "w") as json_file:
-            json.dump(folder_tales, json_file, indent=2)
+        if not is_estimation:
+            logger.info("save root json..")
+            with open(os.path.join(output_path, "root_level.json"), "w") as json_file:
+                json.dump(folder_tales, json_file, indent=2)
+
+            logger.info(f"saving root index in {output_path}")
+            with open(
+                os.path.join(output_path, "README.md"), "w", encoding="utf-8"
+            ) as file:
+                file.write(root_readme)
 
-        logger.info(f"saving root index in {output_path}")
-        with open(
-            os.path.join(output_path, "README.md"), "w", encoding="utf-8"
-        ) as file:
-            file.write(root_readme)
+    return cost
 
 
 def process_folder(
@@ -176,7 +187,9 @@ def process_folder(
     fuse: bool = False,
     debug: bool = False,
     folder_full_name: str = None,
+    is_estimation: bool = False,
 ) -> None:
+    cost = 0
     save_path = os.path.join(output_path, os.path.basename(folder_path))
     tales = []
 
@@ -189,7 +202,10 @@ def process_folder(
         ):
             logger.info(f"processing {file_path}")
             try:
-                file_tale = process_file(file_path, save_path, model_name, fuse, debug)
+                file_tale, file_cost = process_file(
+                    file_path, save_path, model_name, fuse, debug, is_estimation
+                )
+                cost += file_cost
             except Exception as e:
                 logger.info(
                     f"Failed to create dev tale for {file_path} - Exception: {e}"
@@ -245,30 +261,41 @@ def process_folder(
         """
         )
         logger.debug(f"FILE_TALES: {tales}")
-        return "-", "-"
+        return "-", "-", cost
 
     if tales:
         files_summaries = split_text(str(tales), chunk_size=10000)
         # split into two calls to avoid issues with json decoding markdow text.
-        folder_readme = redact_tale_information(
-            "folder-level", files_summaries, model_name="gpt-3.5-turbo-16k"
-        )["text"]
+        folder_readme, fl_cost = redact_tale_information(
+            "folder-level",
+            files_summaries,
+            model_name="gpt-3.5-turbo-16k",
+            is_estimation=is_estimation,
+        )
         folder_readme = folder_readme.replace("----------", "")
 
-        folder_overview = redact_tale_information(
-            "folder-description", folder_readme, model_name="gpt-3.5-turbo-16k"
-        )["text"]
+        folder_overview, fd_cost = redact_tale_information(
+            "folder-description",
+            folder_readme,
+            model_name="gpt-3.5-turbo-16k",
+            is_estimation=is_estimation,
+        )
+
+        cost += fl_cost + fd_cost
 
-        logger.info("save folder json..")
-        with open(os.path.join(save_path, "folder_level.json"), "w") as json_file:
-            json.dump(tales, json_file, indent=2)
+        if not is_estimation:
+            logger.info("save folder json..")
+            with open(os.path.join(save_path, "folder_level.json"), "w") as json_file:
+                json.dump(tales, json_file, indent=2)
 
-        logger.info(f"saving index in {save_path}")
-        with open(os.path.join(save_path, "README.md"), "w", encoding="utf-8") as file:
-            file.write(folder_readme)
+            logger.info(f"saving index in {save_path}")
+            with open(
+                os.path.join(save_path, "README.md"), "w", encoding="utf-8"
+            ) as file:
+                file.write(folder_readme)
 
-        return folder_readme, folder_overview
-    return None
+        return folder_readme, folder_overview, cost
+    return None, None, cost
 
 
 def process_file(
@@ -277,14 +304,16 @@ def process_file(
     model_name: str = DEFAULT_MODEL_NAME,
     fuse: bool = False,
     debug: bool = False,
+    is_estimation: bool = False,
 ) -> None:
+    cost = 0
     file_name = os.path.basename(file_path)
     file_ext = os.path.splitext(file_name)[-1]
     save_path = os.path.join(output_path, f"{file_name}.json")
 
     if debug:
         logger.debug(f"FILE INFO:\nfile_path: {file_path}\nsave_path: {save_path}")
-        return {"file_docstring": "-"}
+        return {"file_docstring": "-"}, cost
 
     if not os.path.exists(output_path):
         os.makedirs(output_path)
@@ -294,15 +323,15 @@ def process_file(
         code = file.read()
 
     if not code:
-        return {"file_docstring": ""}
+        return {"file_docstring": ""}, cost
 
     if os.path.exists(save_path):
         logger.info(f"Skipping {file_name} as its tale file already exists.")
         with open(save_path, "r") as file:
             found_tale = json.load(file)
         if fuse:
             fuse_documentation(code, found_tale, output_path, file_name, file_ext)
-        return found_tale
+        return found_tale, cost
 
     if not file_ext or file_ext in ALLOWED_NO_CODE_EXTENSIONS:
         # a small single chunk is enough
@@ -311,10 +340,15 @@ def process_file(
             "file_name": file_name,
             "file_content": no_code_file,
         }
-        file_docstring = redact_tale_information("no-code-file", no_code_file_data)[
-            "text"
-        ]
-        return {"file_docstring": file_docstring}
+        file_docstring, call_cost = redact_tale_information(
+            content_type="no-code-file",
+            docs=no_code_file_data,
+            model_name="text-davinci-003",
+            is_estimation=is_estimation,
+        )
+        cost += call_cost
+
+        return {"file_docstring": file_docstring}, cost
 
     logger.info("split dev draft ideas")
     big_docs = split_code(code, language=LANGUAGES[file_ext], chunk_size=10000)
@@ -323,7 +357,10 @@ def process_file(
     logger.info("extract code elements")
     code_elements = []
     for idx, doc in enumerate(big_docs):
-        elements_set = extract_code_elements(doc)
+        elements_set, call_cost = extract_code_elements(
+            big_doc=doc, model_name=model_name, is_estimation=is_estimation
+        )
+        cost += call_cost
         if elements_set:
             code_elements.append(elements_set)
 
@@ -343,9 +380,15 @@ def process_file(
     logger.info("create tale sections")
     tales_list = []
     # process only if we have elements to document
-    if code_elements_copy:
+    if code_elements_copy or is_estimation:
         for idx, doc in enumerate(short_docs):
-            tale = get_unit_tale(doc, code_elements_copy, model_name=model_name)
+            tale, call_cost = get_unit_tale(
+                short_doc=doc,
+                code_elements=code_elements_copy,
+                model_name=model_name,
+                is_estimation=is_estimation,
+            )
+            cost += call_cost
             tales_list.append(tale)
             logger.info(f"tale section {str(idx+1)}/{len(short_docs)} done.")
 
@@ -361,20 +404,28 @@ def process_file(
     logger.info("add dev tale summary")
     summaries = split_text(str(code_elements_dict["summary"]), chunk_size=9000)
 
-    file_docstring = redact_tale_information("top-level", summaries)["text"]
+    file_docstring, call_cost = redact_tale_information(
+        content_type="top-level",
+        docs=summaries,
+        model_name="text-davinci-003",
+        is_estimation=is_estimation,
+    )
+    cost += call_cost
 
-    if fuse:
+    if fuse and not is_estimation:
         # add docstring label only to insert it along the docstring into the code
         tale["file_docstring"] = DOCSTRING_LABEL + "\n" + file_docstring
         fuse_documentation(code, tale, output_path, file_name, file_ext)
 
     tale["file_docstring"] = file_docstring
 
     logger.info(f"save dev tale in: {save_path}")
-    with open(save_path, "w") as json_file:
-        json.dump(tale, json_file, indent=2)
 
-    return tale
+    if not is_estimation:
+        with open(save_path, "w") as json_file:
+            json.dump(tale, json_file, indent=2)
+
+    return tale, cost
 
 
 def fuse_documentation(code, tale, output_path, file_name, file_ext):
@@ -443,13 +494,22 @@ def fuse_documentation(code, tale, output_path, file_name, file_ext):
     default=False,
     help="Mock answer and avoid GPT calls",
 )
+@click.option(
+    "--estimation",
+    "is_estimation",
+    is_flag=True,
+    default=False,
+    help="True to calculate an approximate cost of documenting your code without \
+          doing any GPT call",
+)
 def main(
     path: str,
     recursive: bool,
     fuse: bool,
     output_path: str = DEFAULT_OUTPUT_PATH,
     model_name: str = DEFAULT_MODEL_NAME,
     debug: bool = False,
+    is_estimation: bool = False,
 ):
     load_dotenv()
 
@@ -461,34 +521,43 @@ def main(
     if os.path.isdir(path):
         if recursive:
             logger.info("Processing repository")
-            process_repository(
+            price = process_repository(
                 root_path=path,
                 output_path=output_path,
                 model_name=model_name,
                 fuse=fuse,
                 debug=debug,
+                is_estimation=is_estimation,
             )
         else:
             logger.info("Processing folder")
-            process_folder(
+            _, price = process_folder(
                 folder_path=path,
                 output_path=output_path,
                 model_name=model_name,
                 fuse=fuse,
                 debug=debug,
+                is_estimation=is_estimation,
             )
     elif os.path.isfile(path):
         logger.info("Processing file")
-        process_file(
+        _, price = process_file(
             file_path=path,
             output_path=output_path,
             model_name=model_name,
             fuse=fuse,
             debug=debug,
+            is_estimation=is_estimation,
         )
+
     else:
         raise f"Invalid input path {path}. Path must be a directory or code file."
 
+    if is_estimation:
+        logger.info(f"Approximate cost: {price}")
+    else:
+        logger.info(f"Total cost: {price}")
+
 
 if __name__ == "__main__":
     main()
diff --git a/devtale/constants.py b/devtale/constants.py
@@ -13,3 +13,5 @@
 }
 
 DOCSTRING_LABEL = "@DEVTALE-GENERATED:"
+
+GPT_PRICE = {"gpt-4": 0.03, "gpt-3.5-turbo-16k": 0.03, "text-davinci-003": 0.0015}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,3 +13,5 @@
		}

		DOCSTRING_LABEL = "@DEVTALE-GENERATED:"

		GPT_PRICE = {"gpt-4": 0.03, "gpt-3.5-turbo-16k": 0.03, "text-davinci-003": 0.0015}
betogaona7 marked this conversation as resolved. Show resolved Hide resolved