Skip to content

Commit

Permalink
Add additional repo metadata to llm prompts.
Browse files Browse the repository at this point in the history
  • Loading branch information
eli64s committed Oct 23, 2023
1 parent ba5c62f commit 73355bc
Show file tree
Hide file tree
Showing 12 changed files with 320 additions and 225 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,10 @@ notebooks/
.benchmarks/

# Other
templates/
docs/docs
docs/notes
docs/flow.md
examples/markdown/readme-edgecase.md
readmeai/settings/prompts.toml
readmeai/markdown/data/badges.json
templates/
2 changes: 1 addition & 1 deletion docs/features.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## Key Features
## Features

<br>
<div>
Expand Down
3 changes: 3 additions & 0 deletions docs/overview.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# README-AI

---

## Why README-AI?

---
439 changes: 257 additions & 182 deletions examples/markdown/readme-python.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "readmeai"
version = "0.4.036"
version = "0.4.037"
description = "Generate beautiful README.md files from the terminal, powered by AI."
authors = ["Eli <0x.eli.64s@gmail.com>"]
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion readmeai/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,10 @@ class PathsConfig(BaseModel):
class PromptsConfig(BaseModel):
"""Pydantic model for OpenAI prompts."""

code_summary: str
features: str
overview: str
slogan: str
summaries: str


class AppConfig(BaseModel):
Expand Down
17 changes: 12 additions & 5 deletions readmeai/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,20 @@ def __init__(self, config: settings.AppConfig):
self.rate_limit_semaphore = asyncio.Semaphore(self.rate_limit)

async def code_to_text(
self, ignore: dict, files: Dict[str, str], prompt: str
self,
files: Dict[str, str],
ignore: Dict[str, List[str]],
prompt: str,
tree: str,
) -> Dict[str, str]:
"""Converts code to natural language text using large language models.
Parameters
----------
ignore : dict
Files, directories, or file extensions to ignore.
files : Dict[str, str]
The repository files to convert to text.
ignore : Dict[str, List[str]]
Files, directories, or file extensions to ignore.
prompt : str
The prompt to use for the OpenAI API calls.
Expand All @@ -88,7 +92,7 @@ async def code_to_text(
self.logger.warning(f"Ignoring file: {path}")
continue

prompt_code = prompt.format(str(path), contents)
prompt_code = prompt.format(tree, str(path), contents)
tasks.append(
asyncio.create_task(
self.generate_text(path, prompt_code, self.tokens)
Expand Down Expand Up @@ -160,7 +164,10 @@ async def generate_text(
try:
token_count = get_token_count(prompt, self.encoding)

if token_count > tokens:
if token_count > self.tokens_max:
self.logger.warning(
f"Truncating tokens: {token_count} > {self.tokens_max}"
)
prompt = truncate_tokens(prompt, tokens)

async with self.rate_limit_semaphore:
Expand Down
2 changes: 1 addition & 1 deletion readmeai/core/tokens.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Utilities for handling tokennization."""
"""Utilities for handling language tokens."""

from tiktoken import encoding_for_model, get_encoding

Expand Down
19 changes: 10 additions & 9 deletions readmeai/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import asyncio
import traceback

import requests

from readmeai.config.settings import (
AppConfig,
AppConfigModel,
Expand All @@ -20,6 +18,7 @@
from readmeai.core import logger, model, preprocess
from readmeai.markdown import headers, tables, tree
from readmeai.services import version_control as vcs
from readmeai.utils import utils

logger = logger.Logger(__name__)

Expand Down Expand Up @@ -80,23 +79,25 @@ async def readme_agent(conf: AppConfig, conf_helper: ConfigHelper) -> None:
parser = preprocess.RepositoryParser(conf, conf_helper)
dependencies, files = parser.get_dependencies(temp_dir)
logger.info(f"Dependencies: {dependencies}")
logger.info(f"Files: {files}")

# Generate codebase file summaries and README.md text via LLMs.
if conf.cli.offline is False:
code_summary = await llm.code_to_text(
conf_helper.ignore_files,
files,
conf.prompts.code_summary,
conf_helper.ignore_files,
conf.prompts.summaries,
tree_str,
)
logger.info(f"Code summaries returned:\n{code_summary[:5]}")
prompts = [
conf.prompts.slogan.format(conf.git.name),
conf.prompts.overview.format(repository, code_summary),
conf.prompts.features.format(repository, tree),
conf.prompts.overview.format(
repository, tree_str, dependencies, code_summary
),
conf.prompts.features.format(
repository, tree_str, dependencies, code_summary
),
]
slogan, overview, features = await llm.chat_to_text(prompts)

else:
conf.md.tables = tables.build_recursive_tables(
repository, temp_dir, placeholder
Expand Down
35 changes: 19 additions & 16 deletions readmeai/settings/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ endpoint = "https://api.openai.com/v1/chat/completions"
encoding = "cl100k_base"
model = "gpt-3.5-turbo"
rate_limit = 3
tokens = 650
tokens_max = 3800
temperature = 0.9
tokens = 750
tokens_max = 4000
temperature = 1.0

# Version Control Systems
[base_urls]
Expand Down Expand Up @@ -38,12 +38,7 @@ output = "readme-ai.md"

# Prompts
[prompts]
code_summary = """Offer a comprehensive summary that encapsulates the core functionalities of the code:
\nPath: {0}\nContents:\n{1}\n Aim for precision and conciseness in your explanation, ensuring a fine balance between detail and brevity.
Limit your response to a maximum of 225 characters (including spaces).
"""
features = """Hello! Analyze the Git codebase {} and create a robust summary of the project's features.
The following information summarizes each file in the repository to help you get started: \n{}\n
features = """Hello! Analyze the repository {0} and following the instructions below to generate a comprehensive list of features.
Please provide a comprehensive technical analysis of the codebase and its components.
Consider the codebase as a whole and highlight the key characteristics, design patterns, architectural decisions, and any other noteworthy elements.
Generate your response as a Markdown table with the following columns:
Expand All @@ -55,21 +50,29 @@ Generate your response as a Markdown table with the following columns:
| 🔗 | **Dependencies** | Examine the external libraries or other systems that this system relies on here. Limit your response to a maximum of 200 characters.|
| 🧩 | **Modularity** | Discuss the system's organization into smaller, interchangeable components here. Limit your response to a maximum of 200 characters.|
| 🧪 | **Testing** | Evaluate the system's testing strategies and tools here. Limit your response to a maximum of 200 characters. |
| ⚡️ | **Performance** | Analyze how well the system performs, considering speed, efficiency, and resource usage here. Limit your response to a maximum of 200 characters.|
| ⚡️ | **Performance** | Analyze how well the system performs, considering speed, efficiency, and resource usage here. Limit your response to a maximum of 200 characters.|
| 🔐 | **Security** | Assess the measures the system uses to protect data and maintain functionality here. Limit your response to a maximum of 200 characters.|
| 🔀 | **Version Control**| Discuss the system's version control strategies and tools here. Limit your response to a maximum of 200 characters.|
| 🔌 | **Integrations** | Evaluate how the system interacts with other systems and services here. Limit your response to a maximum of 200 characters.|
| 📶 | **Scalability** | Analyze the system's ability to handle growth here. Limit your response to a maximum of 200 characters. |
Thank you for your time and effort!
Repository Details:
\nDirectory Tree: {1}\nDependencies: {2}\nCode Summaries: {3}\n
"""
overview = """Generate a <=100 word summary that describes the capabilities of the repository {0}.
Focus on the project's use-case and value proposition, not its technical details.
Do not refer to the project using the URL provided. Below are more details of the
project for you can get a deep nderstanding of the codebase and its components.
Repository Details:
\nDirectory Tree: {1}\nDependencies: {2}\nCode Summaries: {3}\n
"""
overview = """Please analyze the codebase located at {} and provide a robust, yet succinct overview of the rpoject.
The following includes a list of the summaries of the files in the repository: \n{}\n
Craft 3-4 sentences that encapsulate the core functionalities of the project, its purpose, and its value proposition.
slogan = "Conceptualize a catchy and memorable slogan for the GitHub project: {0}. Limit your response to 80 characters."
summaries = """Offer a comprehensive summary <= 80 words that encapsulates the core functionalities of the code below.
Aim for precision and conciseness in your explanation, ensuring a fine balance between detail and brevity.
\nDirectory Tree: {0}\nPath: {1}\nCode:\n{2}\n
"""
slogan = "Conceptualize a catchy and memorable slogan for the GitHub project: {}. Limit your response to 80 characters."

# Markdown Template Code
# Markdown Templates
[md]
tables = ""
default = "► INSERT-TEXT"
Expand Down
1 change: 1 addition & 0 deletions readmeai/settings/ignore_files.toml
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,5 @@ files = [
"__init__.py",
"start",
"test_binary",
"mkdocs.yml",
]
20 changes: 12 additions & 8 deletions readmeai/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,21 @@

def should_ignore(conf_helper: ConfigHelper, file_path: Path) -> bool:
"""Filters out files that should be ignored."""
for directory in conf_helper.ignore_files["directories"]:
if directory in file_path.parts:
logger.debug(f"Ignoring directory: {file_path}")
return True
ignore_files = conf_helper.ignore_files

if file_path.name in conf_helper.ignore_files["files"]:
logger.debug(f"Ignoring file: {file_path}")
if any(
directory in file_path.parts
for directory in ignore_files["directories"]
):
logger.debug(f"Ignoring directory: {file_path.name}")
return True

if file_path.suffix[1:] in conf_helper.ignore_files["extensions"]:
logger.debug(f"Ignoring extension: {file_path}")
if file_path.name in ignore_files["files"]:
logger.debug(f"Ignoring file: {file_path.name}")
return True

if file_path.suffix.lstrip(".") in ignore_files["extensions"]:
logger.debug(f"Ignoring extension: {file_path.name}")
return True

return False
Expand Down

0 comments on commit 73355bc

Please sign in to comment.