diff --git a/README.md b/README.md index ba69b0a9..9ed8318b 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ You can also replace `hub` with `ingest` in any GitHub URL to access the corresp ## 📚 Requirements -- Python 3.7+ +- Python 3.8+ - For private repositories: A GitHub Personal Access Token (PAT). You can generate one at [https://github.com/settings/personal-access-tokens](https://github.com/settings/personal-access-tokens) (Profile → Settings → Developer Settings → Personal Access Tokens → Fine-grained Tokens) ### 📦 Installation diff --git a/pyproject.toml b/pyproject.toml index f280d4a4..f6d39290 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ dependencies = [ "python-dotenv", "slowapi", "starlette>=0.40.0", # Vulnerable to https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw - "tiktoken", + "tiktoken>=0.7.0", # Support for o200k_base encoding "tomli", "typing_extensions; python_version < '3.10'", "uvicorn>=0.11.7", # Vulnerable to https://osv.dev/vulnerability/PYSEC-2020-150 @@ -23,7 +23,6 @@ classifiers=[ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", diff --git a/requirements.txt b/requirements.txt index 5f8657ed..aa8ff03b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,6 @@ pydantic python-dotenv slowapi starlette>=0.40.0 # Vulnerable to https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw -tiktoken +tiktoken>=0.7.0 # Support for o200k_base encoding tomli uvicorn>=0.11.7 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2020-150 diff --git a/src/gitingest/output_formatters.py b/src/gitingest/output_formatters.py index 5bacba22..9ca3d474 100644 --- a/src/gitingest/output_formatters.py +++ b/src/gitingest/output_formatters.py @@ -171,7 +171,7 @@ def _format_token_count(text: str) -> Optional[str]: The formatted number of tokens as a string (e.g., '1.2k', '1.2M'), or `None` if an error occurs. """ try: - encoding = tiktoken.get_encoding("cl100k_base") + encoding = tiktoken.get_encoding("o200k_base") # gpt-4o, gpt-4o-mini total_tokens = len(encoding.encode(text, disallowed_special=())) except (ValueError, UnicodeEncodeError) as exc: print(exc)