diff --git a/docker/compose/docker-compose.deepseek-14b-gpu.yml b/docker/compose/docker-compose.deepseek-14b-gpu.yml index ca59872f..7b185405 100644 --- a/docker/compose/docker-compose.deepseek-14b-gpu.yml +++ b/docker/compose/docker-compose.deepseek-14b-gpu.yml @@ -23,8 +23,9 @@ services: command: > --model deepseek-ai/DeepSeek-R1-Distill-Qwen-14B --gpu-memory-utilization 0.4 - --max-model-len 10000 + --max-model-len 10000 --tensor-parallel-size 1 + --uvicorn-log-level WARNING environment: - SVC_HOST=deepseek_14b_gpu - SVC_PORT=8000 @@ -36,7 +37,7 @@ services: networks: - backend_net healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 30s retries: 3 start_period: 60s diff --git a/docker/compose/docker-compose.dolphin-8b-gpu.yml b/docker/compose/docker-compose.dolphin-8b-gpu.yml new file mode 100644 index 00000000..79c27338 --- /dev/null +++ b/docker/compose/docker-compose.dolphin-8b-gpu.yml @@ -0,0 +1,51 @@ +services: + dolphin_8b_gpu: + build: + context: . + dockerfile: docker/vllm.Dockerfile + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + ipc: host + ulimits: + memlock: -1 + stack: 67108864 + env_file: + - .env + restart: unless-stopped + depends_on: + etcd: + condition: service_healthy + command: > + --model cognitivecomputations/Dolphin3.0-Llama3.1-8B + --gpu-memory-utilization 0.5 + --max-model-len 10000 + --tensor-parallel-size 1 + --enable-auto-tool-choice + --tool-call-parser llama3_json + --uvicorn-log-level WARNING + environment: + - SVC_HOST=dolphin_8b_gpu + - SVC_PORT=8000 + - ETCD_HOST=etcd + - ETCD_PORT=2379 + - TOOL_SUPPORT=true + volumes: + - hugging_face_models:/root/.cache/huggingface # cache models + networks: + - backend_net + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + retries: 3 + start_period: 60s + timeout: 10s +volumes: + hugging_face_models: + +networks: + backend_net: diff --git a/docker/compose/docker-compose.llama-3b-gpu.yml b/docker/compose/docker-compose.llama-3b-gpu.yml index f893333b..3008898d 100644 --- a/docker/compose/docker-compose.llama-3b-gpu.yml +++ b/docker/compose/docker-compose.llama-3b-gpu.yml @@ -23,10 +23,11 @@ services: command: > --model meta-llama/Llama-3.2-3B-Instruct --gpu-memory-utilization 0.3 - --max-model-len 10000 + --max-model-len 10000 --tensor-parallel-size 1 --enable-auto-tool-choice --tool-call-parser llama3_json + --uvicorn-log-level WARNING environment: - SVC_HOST=llama_3b_gpu - SVC_PORT=8000 @@ -47,4 +48,4 @@ volumes: hugging_face_models: networks: - backend_net: \ No newline at end of file + backend_net: diff --git a/docker/compose/docker-compose.llama-8b-gpu.yml b/docker/compose/docker-compose.llama-8b-gpu.yml index 4dfc6045..247b34cb 100644 --- a/docker/compose/docker-compose.llama-8b-gpu.yml +++ b/docker/compose/docker-compose.llama-8b-gpu.yml @@ -22,11 +22,12 @@ services: condition: service_healthy command: > --model meta-llama/Llama-3.1-8B-Instruct - --gpu-memory-utilization 0.5 - --max-model-len 10000 + --gpu-memory-utilization 0.5 + --max-model-len 10000 --tensor-parallel-size 1 --enable-auto-tool-choice --tool-call-parser llama3_json + --uvicorn-log-level WARNING environment: - SVC_HOST=llama_8b_gpu - SVC_PORT=8000 @@ -47,4 +48,4 @@ volumes: hugging_face_models: networks: - backend_net: \ No newline at end of file + backend_net: diff --git a/uv.lock b/uv.lock index 08820770..dd8b35d7 100644 --- a/uv.lock +++ b/uv.lock @@ -1188,7 +1188,6 @@ dependencies = [ { name = "nilai-api" }, { name = "nilai-common" }, { name = "nilai-models" }, - { name = "pre-commit" }, { name = "verifier" }, ] @@ -1196,6 +1195,7 @@ dependencies = [ dev = [ { name = "black" }, { name = "isort" }, + { name = "pre-commit" }, { name = "pyright" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -1210,7 +1210,6 @@ requires-dist = [ { name = "nilai-api", editable = "nilai-api" }, { name = "nilai-common", editable = "packages/nilai-common" }, { name = "nilai-models", editable = "nilai-models" }, - { name = "pre-commit", specifier = ">=4.1.0" }, { name = "verifier", editable = "packages/verifier" }, ] @@ -1218,6 +1217,7 @@ requires-dist = [ dev = [ { name = "black", specifier = ">=24.10.0" }, { name = "isort", specifier = ">=5.13.2" }, + { name = "pre-commit", specifier = ">=4.1.0" }, { name = "pyright", specifier = ">=1.1" }, { name = "pytest", specifier = ">=8.3.3" }, { name = "pytest-asyncio", specifier = ">=0.25.0" },