Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Estimate approximate cost and total cost #61

Merged
merged 4 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 115 additions & 46 deletions cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ def process_repository(
model_name: str = DEFAULT_MODEL_NAME,
fuse: bool = False,
debug: bool = False,
is_estimation: bool = True,
betogaona7 marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
cost = 0
folder_tales = {
"repository_name": os.path.basename(os.path.abspath(root_path)),
"folders": [],
Expand Down Expand Up @@ -90,7 +92,7 @@ def process_repository(

folder_full_name = os.path.relpath(folder_path, root_path)

folder_readme, folder_tale = process_folder(
folder_readme, folder_tale, folder_cost = process_folder(
folder_path=folder_path,
output_path=os.path.join(output_path, folder_full_name)
if folder_full_name != "."
Expand All @@ -99,7 +101,9 @@ def process_repository(
fuse=fuse,
debug=debug,
folder_full_name=folder_full_name,
is_estimation=is_estimation,
)
cost += folder_cost

except Exception as e:
folder_name = os.path.basename(folder_path)
Expand Down Expand Up @@ -133,9 +137,13 @@ def process_repository(

if folder_tales:
folder_summaries = split_text(str(folder_tales), chunk_size=15000)
root_readme = redact_tale_information(
"root-level", folder_summaries, model_name="gpt-3.5-turbo-16k"
)["text"]
root_readme, call_cost = redact_tale_information(
"root-level",
folder_summaries,
model_name="gpt-3.5-turbo-16k",
is_estimation=is_estimation,
)
cost += call_cost
root_readme = root_readme.replace("----------", "")

# inject folders information
Expand All @@ -158,15 +166,18 @@ def process_repository(

root_readme = root_readme + modified_original_readme

logger.info("save root json..")
with open(os.path.join(output_path, "root_level.json"), "w") as json_file:
json.dump(folder_tales, json_file, indent=2)
if not is_estimation:
logger.info("save root json..")
with open(os.path.join(output_path, "root_level.json"), "w") as json_file:
json.dump(folder_tales, json_file, indent=2)

logger.info(f"saving root index in {output_path}")
with open(
os.path.join(output_path, "README.md"), "w", encoding="utf-8"
) as file:
file.write(root_readme)

logger.info(f"saving root index in {output_path}")
with open(
os.path.join(output_path, "README.md"), "w", encoding="utf-8"
) as file:
file.write(root_readme)
return cost


def process_folder(
Expand All @@ -176,7 +187,9 @@ def process_folder(
fuse: bool = False,
debug: bool = False,
folder_full_name: str = None,
is_estimation: bool = False,
) -> None:
cost = 0
save_path = os.path.join(output_path, os.path.basename(folder_path))
tales = []

Expand All @@ -189,7 +202,10 @@ def process_folder(
):
logger.info(f"processing {file_path}")
try:
file_tale = process_file(file_path, save_path, model_name, fuse, debug)
file_tale, file_cost = process_file(
file_path, save_path, model_name, fuse, debug, is_estimation
)
cost += file_cost
except Exception as e:
logger.info(
f"Failed to create dev tale for {file_path} - Exception: {e}"
Expand Down Expand Up @@ -245,30 +261,41 @@ def process_folder(
"""
)
logger.debug(f"FILE_TALES: {tales}")
return "-", "-"
return "-", "-", cost

if tales:
files_summaries = split_text(str(tales), chunk_size=10000)
# split into two calls to avoid issues with json decoding markdow text.
folder_readme = redact_tale_information(
"folder-level", files_summaries, model_name="gpt-3.5-turbo-16k"
)["text"]
folder_readme, fl_cost = redact_tale_information(
"folder-level",
files_summaries,
model_name="gpt-3.5-turbo-16k",
is_estimation=is_estimation,
)
folder_readme = folder_readme.replace("----------", "")

folder_overview = redact_tale_information(
"folder-description", folder_readme, model_name="gpt-3.5-turbo-16k"
)["text"]
folder_overview, fd_cost = redact_tale_information(
"folder-description",
folder_readme,
model_name="gpt-3.5-turbo-16k",
is_estimation=is_estimation,
)

cost += fl_cost + fd_cost

logger.info("save folder json..")
with open(os.path.join(save_path, "folder_level.json"), "w") as json_file:
json.dump(tales, json_file, indent=2)
if not is_estimation:
logger.info("save folder json..")
with open(os.path.join(save_path, "folder_level.json"), "w") as json_file:
json.dump(tales, json_file, indent=2)

logger.info(f"saving index in {save_path}")
with open(os.path.join(save_path, "README.md"), "w", encoding="utf-8") as file:
file.write(folder_readme)
logger.info(f"saving index in {save_path}")
with open(
os.path.join(save_path, "README.md"), "w", encoding="utf-8"
) as file:
file.write(folder_readme)

return folder_readme, folder_overview
return None
return folder_readme, folder_overview, cost
return None, None, cost


def process_file(
Expand All @@ -277,14 +304,16 @@ def process_file(
model_name: str = DEFAULT_MODEL_NAME,
fuse: bool = False,
debug: bool = False,
is_estimation: bool = False,
) -> None:
cost = 0
file_name = os.path.basename(file_path)
file_ext = os.path.splitext(file_name)[-1]
save_path = os.path.join(output_path, f"{file_name}.json")

if debug:
logger.debug(f"FILE INFO:\nfile_path: {file_path}\nsave_path: {save_path}")
return {"file_docstring": "-"}
return {"file_docstring": "-"}, cost

if not os.path.exists(output_path):
os.makedirs(output_path)
Expand All @@ -294,15 +323,15 @@ def process_file(
code = file.read()

if not code:
return {"file_docstring": ""}
return {"file_docstring": ""}, cost

if os.path.exists(save_path):
logger.info(f"Skipping {file_name} as its tale file already exists.")
with open(save_path, "r") as file:
found_tale = json.load(file)
if fuse:
fuse_documentation(code, found_tale, output_path, file_name, file_ext)
return found_tale
return found_tale, cost

if not file_ext or file_ext in ALLOWED_NO_CODE_EXTENSIONS:
# a small single chunk is enough
Expand All @@ -311,10 +340,15 @@ def process_file(
"file_name": file_name,
"file_content": no_code_file,
}
file_docstring = redact_tale_information("no-code-file", no_code_file_data)[
"text"
]
return {"file_docstring": file_docstring}
file_docstring, call_cost = redact_tale_information(
content_type="no-code-file",
docs=no_code_file_data,
model_name="text-davinci-003",
is_estimation=is_estimation,
)
cost += call_cost

return {"file_docstring": file_docstring}, cost

logger.info("split dev draft ideas")
big_docs = split_code(code, language=LANGUAGES[file_ext], chunk_size=10000)
Expand All @@ -323,7 +357,10 @@ def process_file(
logger.info("extract code elements")
code_elements = []
for idx, doc in enumerate(big_docs):
elements_set = extract_code_elements(doc)
elements_set, call_cost = extract_code_elements(
big_doc=doc, model_name=model_name, is_estimation=is_estimation
)
cost += call_cost
if elements_set:
code_elements.append(elements_set)

Expand All @@ -343,9 +380,15 @@ def process_file(
logger.info("create tale sections")
tales_list = []
# process only if we have elements to document
if code_elements_copy:
if code_elements_copy or is_estimation:
for idx, doc in enumerate(short_docs):
tale = get_unit_tale(doc, code_elements_copy, model_name=model_name)
tale, call_cost = get_unit_tale(
short_doc=doc,
code_elements=code_elements_copy,
model_name=model_name,
is_estimation=is_estimation,
)
cost += call_cost
tales_list.append(tale)
logger.info(f"tale section {str(idx+1)}/{len(short_docs)} done.")

Expand All @@ -361,20 +404,28 @@ def process_file(
logger.info("add dev tale summary")
summaries = split_text(str(code_elements_dict["summary"]), chunk_size=9000)

file_docstring = redact_tale_information("top-level", summaries)["text"]
file_docstring, call_cost = redact_tale_information(
content_type="top-level",
docs=summaries,
model_name="text-davinci-003",
is_estimation=is_estimation,
)
cost += call_cost

if fuse:
if fuse and not is_estimation:
# add docstring label only to insert it along the docstring into the code
tale["file_docstring"] = DOCSTRING_LABEL + "\n" + file_docstring
fuse_documentation(code, tale, output_path, file_name, file_ext)

tale["file_docstring"] = file_docstring

logger.info(f"save dev tale in: {save_path}")
with open(save_path, "w") as json_file:
json.dump(tale, json_file, indent=2)

return tale
if not is_estimation:
with open(save_path, "w") as json_file:
json.dump(tale, json_file, indent=2)

return tale, cost


def fuse_documentation(code, tale, output_path, file_name, file_ext):
Expand Down Expand Up @@ -443,13 +494,22 @@ def fuse_documentation(code, tale, output_path, file_name, file_ext):
default=False,
help="Mock answer and avoid GPT calls",
)
@click.option(
"--estimation",
"is_estimation",
is_flag=True,
default=False,
help="True to calculate an approximate cost of documenting your code without \
betogaona7 marked this conversation as resolved.
Show resolved Hide resolved
doing any GPT call",
betogaona7 marked this conversation as resolved.
Show resolved Hide resolved
)
def main(
path: str,
recursive: bool,
fuse: bool,
output_path: str = DEFAULT_OUTPUT_PATH,
model_name: str = DEFAULT_MODEL_NAME,
debug: bool = False,
is_estimation: bool = False,
):
load_dotenv()

Expand All @@ -461,34 +521,43 @@ def main(
if os.path.isdir(path):
if recursive:
logger.info("Processing repository")
process_repository(
price = process_repository(
root_path=path,
output_path=output_path,
model_name=model_name,
fuse=fuse,
debug=debug,
is_estimation=is_estimation,
)
else:
logger.info("Processing folder")
process_folder(
_, price = process_folder(
folder_path=path,
output_path=output_path,
model_name=model_name,
fuse=fuse,
debug=debug,
is_estimation=is_estimation,
)
elif os.path.isfile(path):
logger.info("Processing file")
process_file(
_, price = process_file(
file_path=path,
output_path=output_path,
model_name=model_name,
fuse=fuse,
debug=debug,
is_estimation=is_estimation,
)

else:
raise f"Invalid input path {path}. Path must be a directory or code file."

if is_estimation:
logger.info(f"Approximate cost: {price}")
else:
logger.info(f"Total cost: {price}")


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions devtale/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@
}

DOCSTRING_LABEL = "@DEVTALE-GENERATED:"

GPT_PRICE = {"gpt-4": 0.03, "gpt-3.5-turbo-16k": 0.03, "text-davinci-003": 0.0015}
betogaona7 marked this conversation as resolved.
Show resolved Hide resolved
Loading