diff --git a/.env.sample b/.env.sample index d4eb325a..e0f6f9db 100644 --- a/.env.sample +++ b/.env.sample @@ -1 +1,5 @@ -HUGGINGFACE_API_TOKEN="" \ No newline at end of file +HUGGINGFACE_API_TOKEN="" +OMP_NUM_THREADS=8 +MKL_NUM_THREADS=8 +NUMEXPR_NUM_THREADS=8 +OPENBLAS_NUM_THREADS=8 \ No newline at end of file diff --git a/README.md b/README.md index d8817bd1..1ee3fa90 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,16 @@ Copy the `.env.sample` to `.env` to and replace the value of the `HUGGINGFACE_API_TOKEN` with the appropriate value. It is required to download Llama3.2 1B. +For development environments: ```shell -docker compose up --build web +docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build nilai +``` + +For production environments: +```shell +docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d +``` + +``` +uv run gunicorn -c gunicorn.conf.py nilai.__main__:app ``` \ No newline at end of file diff --git a/caddy/.gitignore b/caddy/.gitignore new file mode 100644 index 00000000..fbc7f1cd --- /dev/null +++ b/caddy/.gitignore @@ -0,0 +1,2 @@ +caddy_config/ +caddy_data/ \ No newline at end of file diff --git a/caddy/Caddyfile b/caddy/Caddyfile new file mode 100644 index 00000000..7bdb0c8a --- /dev/null +++ b/caddy/Caddyfile @@ -0,0 +1,10 @@ +(ssl_config) { + tls { + protocols tls1.2 tls1.3 + } + } + + https://nilai.sandbox.nilogy.xyz { + import ssl_config + reverse_proxy nilai:8443 + } \ No newline at end of file diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml new file mode 100644 index 00000000..e20e75ac --- /dev/null +++ b/docker-compose.dev.yml @@ -0,0 +1,4 @@ +services: + nilai: + ports: + - "8080:8080" \ No newline at end of file diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml new file mode 100644 index 00000000..774676a6 --- /dev/null +++ b/docker-compose.prod.yml @@ -0,0 +1,21 @@ +services: + nilai: + networks: + - proxy_net + caddy: + image: caddy:latest + container_name: caddy + restart: unless-stopped + networks: + - proxy_net + ports: + - "80:80" + - "443:443" + - "443:443/udp" + volumes: + - ./caddy/Caddyfile:/etc/caddy/Caddyfile + - ./caddy/caddy_data:/data + - ./caddy/caddy_config:/config + +networks: + proxy_net: \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 24f88723..8cc45612 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,11 +3,8 @@ services: build: context: . dockerfile: docker/Dockerfile - ports: - - "12345:12345" volumes: - ${PWD}/db/:/app/db/ # sqlite database for users - hugging_face_models:/root/.cache/huggingface # cache models - volumes: hugging_face_models: \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile index 24c2076a..0b59665f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,16 +1,18 @@ FROM python:3.12-slim COPY --link nilai /app/nilai -COPY pyproject.toml uv.lock .env /app/ +COPY pyproject.toml uv.lock .env gunicorn.conf.py /app/ WORKDIR /app -RUN pip install uv -RUN uv sync +RUN apt-get update && \ +apt-get install build-essential certbot -y && \ +apt-get clean && \ +apt-get autoremove && \ +rm -rf /var/lib/apt/lists/* && \ +pip install uv && \ +uv sync -EXPOSE 12345 +EXPOSE 8080 8443 -# ENTRYPOINT ["uv", "run", "fastapi", "run", "nilai/main.py"] -# CMD ["--host", "0.0.0.0", "--port", "12345"] - -CMD ["uv", "run", "fastapi", "run", "nilai/main.py", "--host", "0.0.0.0", "--port", "12345"] \ No newline at end of file +CMD ["uv", "run", "gunicorn", "-c", "gunicorn.conf.py", "nilai.__main__:app"] \ No newline at end of file diff --git a/docker/README.md b/docker/README.md index e82afc29..b2d13475 100644 --- a/docker/README.md +++ b/docker/README.md @@ -4,7 +4,8 @@ docker build -t nillion/nilai:latest -f docker/Dockerfile . docker run \ - -p 12345:12345 \ + -p 8080:8080 \ + -p 8443:8443 \ -v hugging_face_models:/root/.cache/huggingface \ -v $(pwd)/users.sqlite:/app/users.sqlite \ nillion/nilai:latest diff --git a/gunicorn.conf.py b/gunicorn.conf.py new file mode 100644 index 00000000..bcd2cb93 --- /dev/null +++ b/gunicorn.conf.py @@ -0,0 +1,16 @@ +# gunicorn.config.py + +# Bind to address and port +bind = ["0.0.0.0:8080", "0.0.0.0:8443"] + +# Set the number of workers (2) +workers = 2 + +# Set the number of threads per worker (16) +threads = 16 + +# Set the timeout (120 seconds) +timeout = 120 + +# Set the worker class to UvicornWorker for async handling +worker_class = "uvicorn.workers.UvicornWorker" diff --git a/nilai/__main__.py b/nilai/__main__.py new file mode 100644 index 00000000..17023698 --- /dev/null +++ b/nilai/__main__.py @@ -0,0 +1,21 @@ +import uvicorn + +from nilai.app import app + + +def run_uvicorn(): + """ + Function to run the app with Uvicorn for debugging. + """ + uvicorn.run( + app, + host="0.0.0.0", # Listen on all interfaces + port=8080, # Use the desired port + reload=True, # Enable auto-reload for development + # ssl_certfile=SSL_CERTFILE, + # ssl_keyfile=SSL_KEYFILE, + ) + + +if __name__ == "__main__": + run_uvicorn() diff --git a/nilai/main.py b/nilai/app.py similarity index 69% rename from nilai/main.py rename to nilai/app.py index 08cd5d9d..5e0af62e 100644 --- a/nilai/main.py +++ b/nilai/app.py @@ -35,24 +35,13 @@ "name": "Model", "description": "Model information", }, + { + "name": "Usage", + "description": "User token usage", + }, ], ) app.include_router(public.router) app.include_router(private.router, dependencies=[Depends(get_user)]) - -if __name__ == "__main__": - import uvicorn - - # Path to your SSL certificate and key files - # SSL_CERTFILE = "/path/to/certificate.pem" # Replace with your certificate file path - # SSL_KEYFILE = "/path/to/private-key.pem" # Replace with your private key file path - - uvicorn.run( - app, - host="0.0.0.0", # Listen on all interfaces - port=12345, # Use port 8443 for HTTPS - # ssl_certfile=SSL_CERTFILE, - # ssl_keyfile=SSL_KEYFILE, - ) diff --git a/nilai/db.py b/nilai/db.py index 49f54441..f39d6e12 100644 --- a/nilai/db.py +++ b/nilai/db.py @@ -53,8 +53,8 @@ class User(Base): userid = Column(String(36), primary_key=True, index=True) name = Column(String(100), nullable=False) apikey = Column(String(36), unique=True, nullable=False, index=True) - input_tokens = Column(Integer, default=0, nullable=False) - generated_tokens = Column(Integer, default=0, nullable=False) + prompt_tokens = Column(Integer, default=0, nullable=False) + completion_tokens = Column(Integer, default=0, nullable=False) def __repr__(self): return f"" @@ -146,7 +146,7 @@ def insert_user(name: str) -> Dict[str, str]: raise @staticmethod - def check_api_key(api_key: str) -> Optional[str]: + def check_api_key(api_key: str) -> Optional[dict]: """ Validate an API key. @@ -159,33 +159,59 @@ def check_api_key(api_key: str) -> Optional[str]: try: with get_db_session() as session: user = session.query(User).filter(User.apikey == api_key).first() - return user.name if user else None # type: ignore + return {"name": user.name, "userid": user.userid} if user else None # type: ignore except SQLAlchemyError as e: logger.error(f"Error checking API key: {e}") return None @staticmethod - def update_token_usage(userid: str, input_tokens: int, generated_tokens: int): + def update_token_usage(userid: str, prompt_tokens: int, completion_tokens: int): """ Update token usage for a specific user. Args: userid (str): User's unique ID - input_tokens (int): Number of input tokens - generated_tokens (int): Number of generated tokens + prompt_tokens (int): Number of input tokens + completion_tokens (int): Number of generated tokens """ try: with get_db_session() as session: user = session.query(User).filter(User.userid == userid).first() if user: - user.input_tokens += input_tokens # type: ignore - user.generated_tokens += generated_tokens # type: ignore + user.prompt_tokens += prompt_tokens # type: ignore + user.completion_tokens += completion_tokens # type: ignore logger.info(f"Updated token usage for user {userid}") else: logger.warning(f"User {userid} not found") except SQLAlchemyError as e: logger.error(f"Error updating token usage: {e}") + @staticmethod + def get_token_usage( + userid: str, + ) -> ( + Dict[str, Any] | None + ): # -> dict[str, Any] | None:# -> dict[str, Any] | None:# -> dict[str, Any] | None:# -> dict[str, Any] | None:# -> dict[str, Any] | None: + """ + Get token usage for a specific user. + + Args: + userid (str): User's unique ID + """ + try: + with get_db_session() as session: + user = session.query(User).filter(User.userid == userid).first() + if user: + return { + "prompt_tokens": user.prompt_tokens, + "completion_tokens": user.completion_tokens, + "total_tokens": user.prompt_tokens + user.completion_tokens, + } + else: + logger.warning(f"User {userid} not found") + except SQLAlchemyError as e: + logger.error(f"Error updating token usage: {e}") + @staticmethod def get_all_users() -> Optional[List[UserData]]: """ @@ -202,8 +228,8 @@ def get_all_users() -> Optional[List[UserData]]: userid=user.userid, # type: ignore name=user.name, # type: ignore apikey=user.apikey, # type: ignore - input_tokens=user.input_tokens, # type: ignore - generated_tokens=user.generated_tokens, # type: ignore + input_tokens=user.prompt_tokens, # type: ignore + generated_tokens=user.completion_tokens, # type: ignore ) for user in users ] @@ -227,8 +253,8 @@ def get_user_token_usage(userid: str) -> Optional[Dict[str, int]]: user = session.query(User).filter(User.userid == userid).first() if user: return { - "input_tokens": user.input_tokens, - "generated_tokens": user.generated_tokens, + "prompt_tokens": user.prompt_tokens, + "completion_tokens": user.completion_tokens, } # type: ignore return None except SQLAlchemyError as e: @@ -255,6 +281,8 @@ def get_user_token_usage(userid: str) -> Optional[Dict[str, int]]: print(f"API key validation: {user_name}") # Update and retrieve token usage - UserManager.update_token_usage(bob["userid"], input_tokens=50, generated_tokens=20) + UserManager.update_token_usage( + bob["userid"], prompt_tokens=50, completion_tokens=20 + ) usage = UserManager.get_user_token_usage(bob["userid"]) print(f"Bob's token usage: {usage}") diff --git a/nilai/routers/private.py b/nilai/routers/private.py index 10edc751..853daa40 100644 --- a/nilai/routers/private.py +++ b/nilai/routers/private.py @@ -1,20 +1,17 @@ # Fast API and serving -import time from base64 import b64encode -from typing import Any, List -from uuid import uuid4 from fastapi import APIRouter, Body, Depends, HTTPException from nilai.auth import get_user from nilai.crypto import sign_message +from nilai.db import UserManager # Internal libraries from nilai.model import ( AttestationResponse, ChatRequest, ChatResponse, - Choice, Message, Model, Usage, @@ -24,6 +21,11 @@ router = APIRouter() +@router.get("/v1/usage", tags=["Usage"]) +async def get_usage(user: dict = Depends(get_user)) -> Usage: + return Usage(**UserManager.get_token_usage(user["userid"])) + + # Model Information Endpoint @router.get("/v1/model-info", tags=["Model"]) async def get_model_info(user: str = Depends(get_user)) -> dict: @@ -37,7 +39,7 @@ async def get_model_info(user: str = Depends(get_user)) -> dict: # Attestation Report Endpoint @router.get("/v1/attestation/report", tags=["Attestation"]) -async def get_attestation(user: str = Depends(get_user)) -> AttestationResponse: +async def get_attestation(user: dict = Depends(get_user)) -> AttestationResponse: return AttestationResponse( verifying_key=state.verifying_key, cpu_attestation="...", @@ -47,13 +49,13 @@ async def get_attestation(user: str = Depends(get_user)) -> AttestationResponse: # Available Models Endpoint @router.get("/v1/models", tags=["Model"]) -async def get_models(user: str = Depends(get_user)) -> dict[str, list[Model]]: +async def get_models(user: dict = Depends(get_user)) -> dict[str, list[Model]]: return {"models": state.models} # Chat Completion Endpoint @router.post("/v1/chat/completions", tags=["Chat"]) -def chat_completion( +async def chat_completion( req: ChatRequest = Body( ChatRequest( model=state.models[0].name, @@ -63,17 +65,14 @@ def chat_completion( ], ) ), - user: str = Depends(get_user), + user: dict = Depends(get_user), ) -> ChatResponse: if not req.messages or len(req.messages) == 0: raise HTTPException(status_code=400, detail="The 'messages' field is required.") - if not req.model: raise HTTPException(status_code=400, detail="The 'model' field is required.") # Combine messages into a single prompt - print(req) - prompt = "\n".join([f"{msg.role}: {msg.content}" for msg in req.messages]) prompt = [ { "role": msg.role, @@ -83,38 +82,22 @@ def chat_completion( ] # Generate response - generated: List[Any] = state.chat_pipeline( - prompt, max_length=1024, num_return_sequences=1, truncation=True - ) # type: ignore - print(type(generated)) + generated: dict = state.chat_pipeline.create_chat_completion(prompt) if not generated or len(generated) == 0: raise HTTPException(status_code=500, detail="The model returned no output.") - response = generated[0]["generated_text"][-1] - print(f"Prompt: {prompt}, Response: {response}") - usage = Usage( - prompt_tokens=sum(len(msg.content.split()) for msg in req.messages), - completion_tokens=len(response["content"].split()), - total_tokens=0, - ) - usage.total_tokens = usage.prompt_tokens + usage.completion_tokens response = ChatResponse( - id=f"chat-{uuid4()}", - object="chat.completion", - created=int(time.time()), - model=req.model, - choices=[ - Choice( - index=0, - message=Message(**response), - finish_reason="stop", - logprobs=None, - ) - ], - usage=usage, - signature="", # Will be filled later + signature="", + **generated, ) + response.model = req.model + + UserManager.update_token_usage( + user["userid"], + prompt_tokens=response.usage.prompt_tokens, + completion_tokens=response.usage.completion_tokens, + ) # Sign the response response_json = response.model_dump_json() signature = sign_message(state.private_key, response_json) diff --git a/nilai/state.py b/nilai/state.py index ce0ed436..4fc802ad 100644 --- a/nilai/state.py +++ b/nilai/state.py @@ -1,31 +1,23 @@ -import os import time +from asyncio import Semaphore -import torch from dotenv import load_dotenv -from transformers import pipeline +from llama_cpp import Llama from nilai.crypto import generate_key_pair from nilai.model import Model -# Load the .env file -load_dotenv() - -# # Application State Initialization -# torch.set_num_threads(1) -# torch.set_num_interop_threads(1) - class AppState: def __init__(self): self.private_key, self.public_key, self.verifying_key = generate_key_pair() - self.chat_pipeline = pipeline( - "text-generation", - model="meta-llama/Llama-3.2-1B-Instruct", - model_kwargs={"torch_dtype": torch.bfloat16}, - device_map="auto", - token=os.getenv("HUGGINGFACE_API_TOKEN"), + self.chat_pipeline = Llama.from_pretrained( + repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF", + filename="Llama-3.2-1B-Instruct-Q5_K_S.gguf", + n_threads=16, + verbose=False, ) + self.sem = Semaphore(2) self.models = [ Model( id="meta-llama/Llama-3.2-1B-Instruct", @@ -60,4 +52,5 @@ def uptime(self): return ", ".join(parts) +load_dotenv() state = AppState() diff --git a/pyproject.toml b/pyproject.toml index 57cc25b1..f589b136 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ dependencies = [ "cryptography>=43.0.3", "fastapi[standard]>=0.115.5", "gunicorn>=23.0.0", + "llama-cpp-python>=0.3.2", "python-dotenv>=1.0.1", "sqlalchemy>=2.0.36", "torch>=2.5.1", diff --git a/tests/model_execution_0.py b/tests/model_execution_0.py new file mode 100644 index 00000000..f7bbe1ae --- /dev/null +++ b/tests/model_execution_0.py @@ -0,0 +1,38 @@ +import os +import time + +import torch +from dotenv import load_dotenv +from transformers import pipeline + +# Load the .env file +load_dotenv() + +# # Application State Initialization +torch.set_num_threads(32) +torch.set_num_interop_threads(32) + + +chat_pipeline = pipeline( + "text-generation", + model="meta-llama/Llama-3.2-1B-Instruct", + model_kwargs={"torch_dtype": torch.bfloat16}, + device_map="cpu", + token=os.getenv("HUGGINGFACE_API_TOKEN"), +) + +messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is your name?"}, +] + +start = time.time() +# Generate response +generated = chat_pipeline( + messages, max_length=1024, num_return_sequences=1, truncation=True +) # type: ignore + +end = time.time() + +print(generated) +print(end - start) diff --git a/tests/model_execution_1.py b/tests/model_execution_1.py new file mode 100644 index 00000000..2ef17ab4 --- /dev/null +++ b/tests/model_execution_1.py @@ -0,0 +1,49 @@ +import time + +from onnxruntime import InferenceSession +from optimum.onnxruntime import ORTModelForCausalLM +from transformers import AutoTokenizer + +# Define the model directory and ONNX export location +model_name = "meta-llama/Llama-3.2-1B-Instruct" +onnx_export_dir = "./onnx_model" + +# Export the model +model = ORTModelForCausalLM.from_pretrained(model_name, from_transformers=True) +model.save_pretrained(onnx_export_dir) + +# Save the tokenizer for later use +tokenizer = AutoTokenizer.from_pretrained(model_name) +tokenizer.save_pretrained(onnx_export_dir) + + +# Load the ONNX model and tokenizer +onnx_model_path = "./onnx_model/model.onnx" +tokenizer = AutoTokenizer.from_pretrained("./onnx_model") + +# Create an ONNX Runtime session +session = InferenceSession(onnx_model_path) + +# Input messages +messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is your name?"}, +] + +# Prepare input text +input_text = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) + +# Tokenize input text +inputs = tokenizer(input_text, return_tensors="pt") +print("START:") +# Run inference +start = time.time() +onnx_inputs = {session.get_inputs()[0].name: inputs["input_ids"].numpy()} +onnx_output = session.run(None, onnx_inputs) +end = time.time() + +# Decode the output +output_text = tokenizer.decode(onnx_output[0][0], skip_special_tokens=True) + +print(output_text) +print(f"Time taken: {end - start} seconds") diff --git a/tests/model_execution_2.py b/tests/model_execution_2.py new file mode 100644 index 00000000..821279ae --- /dev/null +++ b/tests/model_execution_2.py @@ -0,0 +1,34 @@ +import os +import time + +from dotenv import load_dotenv +from optimum.pipelines import pipeline + +# Load the .env file +load_dotenv() + + +chat_pipeline = pipeline( + "text-generation", + model="meta-llama/Llama-3.2-1B-Instruct", + accelerator="ort", + token=os.getenv("HUGGINGFACE_API_TOKEN"), +) + +messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is your name?"}, +] + +print("start") +for i in range(10): + start = time.time() + # Generate response + generated = chat_pipeline( + messages, max_length=1024, num_return_sequences=1, truncation=True + ) # type: ignore + + end = time.time() + + print(generated) + print(end - start) diff --git a/tests/model_execution_3.py b/tests/model_execution_3.py new file mode 100644 index 00000000..33c4ace7 --- /dev/null +++ b/tests/model_execution_3.py @@ -0,0 +1,25 @@ +import time + +from llama_cpp import Llama + +llm = Llama.from_pretrained( + repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF", + filename="Llama-3.2-1B-Instruct-Q5_K_S.gguf", +) + + +messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is your name?"}, +] + +print("start") +for i in range(10): + start = time.time() + # Generate response + generated = llm.create_chat_completion(messages) + + end = time.time() + + print(generated) + print(end - start) diff --git a/tests/test_cryptography.py b/tests/test_cryptography.py index 51fb4cbd..702ca540 100644 --- a/tests/test_cryptography.py +++ b/tests/test_cryptography.py @@ -2,7 +2,6 @@ import pytest from cryptography.exceptions import InvalidSignature -from cryptography.hazmat.primitives import serialization from cryptography.hazmat.primitives.asymmetric import ec from nilai.crypto import generate_key_pair, sign_message, verify_signature diff --git a/tests/test_db.py b/tests/test_db.py index 574bd26d..21d43451 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -6,8 +6,9 @@ from sqlalchemy.pool import StaticPool import nilai.db as db + # Import the classes and functions to test -from nilai.db import Base, DatabaseConfig, User, UserManager, get_db_session +from nilai.db import Base, UserManager @pytest.fixture(scope="function") @@ -123,8 +124,8 @@ def test_update_token_usage(self, user_manager): def test_get_all_users(self, user_manager): """Test retrieving all users.""" # Insert multiple users - user1 = user_manager.insert_user("User 1") - user2 = user_manager.insert_user("User 2") + _ = user_manager.insert_user("User 1") + _ = user_manager.insert_user("User 2") # Retrieve all users all_users = user_manager.get_all_users() diff --git a/uv.lock b/uv.lock index 0e30bbf4..2cc024e1 100644 --- a/uv.lock +++ b/uv.lock @@ -196,6 +196,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/87/5c/3dab83cc4aba1f4b0e733e3f0c3e7d4386440d660ba5b1e3ff995feb734d/cryptography-43.0.3-cp39-abi3-win_amd64.whl", hash = "sha256:0c580952eef9bf68c4747774cde7ec1d85a6e61de97281f2dba83c7d2c806362", size = 3068026 }, ] +[[package]] +name = "diskcache" +version = "5.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550 }, +] + [[package]] name = "dnspython" version = "2.7.0" @@ -440,6 +449,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 }, ] +[[package]] +name = "llama-cpp-python" +version = "0.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "diskcache" }, + { name = "jinja2" }, + { name = "numpy" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5f/0e/ff129005a33b955088fc7e4ecb57e5500b604fb97eca55ce8688dbe59680/llama_cpp_python-0.3.2.tar.gz", hash = "sha256:8fbf246a55a999f45015ed0d48f91b4ae04ae959827fac1cd6ac6ec65aed2e2f", size = 64964148 } + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -535,6 +556,7 @@ dependencies = [ { name = "cryptography" }, { name = "fastapi", extra = ["standard"] }, { name = "gunicorn" }, + { name = "llama-cpp-python" }, { name = "python-dotenv" }, { name = "sqlalchemy" }, { name = "torch" }, @@ -557,6 +579,7 @@ requires-dist = [ { name = "cryptography", specifier = ">=43.0.3" }, { name = "fastapi", extras = ["standard"], specifier = ">=0.115.5" }, { name = "gunicorn", specifier = ">=23.0.0" }, + { name = "llama-cpp-python", specifier = ">=0.3.2" }, { name = "python-dotenv", specifier = ">=1.0.1" }, { name = "sqlalchemy", specifier = ">=2.0.36" }, { name = "torch", specifier = ">=2.5.1" },