Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
run: uv run ruff format --check

- name: Run Ruff linting
run: uv run ruff check
run: uv run ruff check --exclude packages/verifier/

- name: Run tests
run: uv run pytest -v
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build

For production environments:
```shell
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d --build
docker compose -f docker-compose.gpu.yml -f docker-compose.prod.yml up -d --build
```

## Manual Deployment
Expand Down
27 changes: 20 additions & 7 deletions docker-compose.gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ services:
condition: service_healthy
volumes:
- ${PWD}/db/:/app/db/ # sqlite database for users
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
environment:
- ETCD_HOST=etcd
- ETCD_PORT=2379
Expand All @@ -33,14 +40,19 @@ services:
build:
context: .
dockerfile: docker/vllm.Dockerfile
runtime: nvidia
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
ipc: host
ulimits:
memlock: -1
stack: 67108864
env_file:
- .env
depends_on:
etcd:
condition: service_healthy
Expand All @@ -54,8 +66,6 @@ services:
- SVC_PORT=8000
- ETCD_HOST=etcd
- ETCD_PORT=2379
env_file:
- .env
volumes:
- hugging_face_models:/root/.cache/huggingface # cache models
networks:
Expand All @@ -64,14 +74,19 @@ services:
build:
context: .
dockerfile: docker/vllm.Dockerfile
runtime: nvidia
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
ipc: host
ulimits:
memlock: -1
stack: 67108864
env_file:
- .env
depends_on:
etcd:
condition: service_healthy
Expand All @@ -85,8 +100,6 @@ services:
- SVC_PORT=8000
- ETCD_HOST=etcd
- ETCD_PORT=2379
env_file:
- .env
volumes:
- hugging_face_models:/root/.cache/huggingface # cache models
networks:
Expand All @@ -95,4 +108,4 @@ volumes:
hugging_face_models:

networks:
backend_net:
backend_net:
4 changes: 3 additions & 1 deletion nilai-api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ authors = [
requires-python = ">=3.12"
dependencies = [
"accelerate>=1.1.1",
"cryptography>=44.0.0",
"cryptography>=43.0.1",
"fastapi[standard]>=0.115.5",
"gunicorn>=23.0.0",
"nilai-common",
Expand All @@ -26,6 +26,7 @@ dependencies = [
"pg8000>=1.31.2",
"asyncpg>=0.30.0",
"greenlet>=3.1.1",
"verifier",
]


Expand All @@ -35,3 +36,4 @@ build-backend = "hatchling.build"

[tool.uv.sources]
nilai-common = { workspace = true }
verifier = { workspace = true }
1 change: 1 addition & 0 deletions nilai-api/src/nilai_api/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ async def log_query(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
query_timestamp=datetime.now(),
)
session.add(query_log)
await session.commit()
Expand Down
33 changes: 32 additions & 1 deletion nilai-api/src/nilai_api/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
from nilai_api.sev.sev import sev
from nilai_common import SETTINGS, ModelServiceDiscovery
from nilai_common.api_model import ModelEndpoint
from verifier.cc_admin import collect_gpu_evidence, attest
import secrets
import json
import base64

logger = logging.getLogger("uvicorn.error")

Expand Down Expand Up @@ -36,7 +40,34 @@ def cpu_attestation(self) -> str:

@property
def gpu_attestation(self) -> str:
return self._gpu_quote
# Check if GPU is available
try:
nonce = secrets.token_bytes(32).hex()
arguments_as_dictionary = {
"nonce": nonce,
"verbose": False,
"test_no_gpu": False,
"rim_root_cert": None,
"rim_service_url": None,
"ocsp_service_url": None,
"ocsp_attestation_settings": "default",
"allow_hold_cert": None,
"ocsp_validity_extension": None,
"ocsp_cert_revocation_extension_device": None,
"ocsp_cert_revocation_extension_driver_rim": None,
"ocsp_cert_revocation_extension_vbios_rim": None,
}
evidence_list = collect_gpu_evidence(
nonce,
)
result, jwt_token = attest(arguments_as_dictionary, nonce, evidence_list)
self._gpu_quote = base64.b64encode(
json.dumps({"result": result, "jwt_token": jwt_token}).encode()
).decode()
return self._gpu_quote
except Exception as e:
logging.error("Could not attest GPU: %s", e)
return self._gpu_quote

@property
def uptime(self):
Expand Down
2 changes: 1 addition & 1 deletion nilai-models/src/nilai_models/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
logger = logging.getLogger(__name__)


async def get_metadata(num_retries=10):
async def get_metadata(num_retries=30):
"""Fetch model metadata from model
service and return as ModelMetadata object"""
current_retries = 0
Expand Down
Loading
Loading