Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Local llm mutli-gpu support #1391

Merged
merged 10 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ COPY --from=compile-image /root/nltk_data /root/nltk_data

ENV PATH="/opt/venv/bin:$PATH"

EXPOSE 8001
EXPOSE 8001
45 changes: 45 additions & 0 deletions Dockerfile-gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Define the CUDA SDK version you need
ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
FROM nvidia/cuda:${CUDA_IMAGE}

ENV DEBIAN_FRONTEND=noninteractive

WORKDIR /app

RUN apt-get update && apt-get upgrade -y \
&& apt-get install -y git build-essential \
python3 python3-pip python3.10-venv libpq-dev gcc wget \
ocl-icd-opencl-dev opencl-headers clinfo \
libclblast-dev libopenblas-dev \
&& mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd

# Create a virtual environment and activate it
RUN python3 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

# Install Python dependencies from requirements.txt
COPY requirements.txt .
RUN pip install --upgrade pip && \
pip install --no-cache-dir -r requirements.txt

# Running nltk setup as you mentioned
RUN python3.10 -c "import nltk; nltk.download('punkt')" && \
python3.10 -c "import nltk; nltk.download('averaged_perceptron_tagger')"

# Copy the application code
COPY . .

ENV CUDA_DOCKER_ARCH=all
ENV LLAMA_CUBLAS=1

RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.7 --force-reinstall --upgrade --no-cache-dir

# Make necessary scripts executable
RUN chmod +x ./entrypoint.sh ./wait-for-it.sh ./install_tool_dependencies.sh ./entrypoint_celery.sh

# Set environment variable to point to the custom libllama.so
# ENV LLAMA_CPP_LIB=/app/llama.cpp/libllama.so

EXPOSE 8001

CMD ["./entrypoint.sh"]
16 changes: 12 additions & 4 deletions README.MD
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,18 @@ cd SuperAGI

4. Ensure that Docker is installed on your system. You can download and install it from [here](https://docs.docker.com/get-docker/).

5. Once you have Docker Desktop running, run the following command in the in the SuperAGI directory :
```
docker-compose up --build
```
5. Once you have Docker Desktop running, run the following command in the SuperAGI directory:

a. For regular usage:
```
docker compose -f docker-compose.yaml up --build
```

b. If you want to use SuperAGI with Local LLMs and have GPU, run the following command:
```
docker compose -f docker-compose-gpu.yml up --build
```


6. Open your web browser and navigate to http://localhost:3000 to access SuperAGI.

Expand Down
1 change: 1 addition & 0 deletions config_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,4 @@ ENGINE_ID: "stable-diffusion-xl-beta-v2-2-2"
## To use Qdrant for vector store
#QDRANT_HOST_NAME: YOUR_QDRANT_HOST_NAME
#QDRANT_PORT: YOUR_QDRANT_PORT
#GPU_LAYERS: GPU LAYERS THAT YOU WANT TO OFFLOAD TO THE GPU WHILE USING LOCAL LLMS
97 changes: 97 additions & 0 deletions docker-compose-gpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
version: '3.8'
services:
backend:
volumes:
- "./:/app"
- "/home/ubuntu/models/vicuna-7B-v1.5-GGUF/vicuna-7b-v1.5.Q5_K_M.gguf:/app/local_model_path"
build:
context: .
dockerfile: Dockerfile-gpu
depends_on:
- super__redis
- super__postgres
networks:
- super_network
command: ["/app/wait-for-it.sh", "super__postgres:5432","-t","60","--","/app/entrypoint.sh"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]

celery:
volumes:
- "./:/app"
- "${EXTERNAL_RESOURCE_DIR:-./workspace}:/app/ext"
- "/home/ubuntu/models/vicuna-7B-v1.5-GGUF/vicuna-7b-v1.5.Q5_K_M.gguf:/app/local_model_path"
build:
context: .
dockerfile: Dockerfile-gpu
depends_on:
- super__redis
- super__postgres
networks:
- super_network
command: ["/app/entrypoint_celery.sh"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
gui:
build:
context: ./gui
args:
NEXT_PUBLIC_API_BASE_URL: "/api"
networks:
- super_network
# volumes:
# - ./gui:/app
# - /app/node_modules/
# - /app/.next/
super__redis:
image: "redis/redis-stack-server:latest"
networks:
- super_network
# uncomment to expose redis port to host
# ports:
# - "6379:6379"
volumes:
- redis_data:/data

super__postgres:
image: "docker.io/library/postgres:15"
environment:
- POSTGRES_USER=superagi
- POSTGRES_PASSWORD=password
- POSTGRES_DB=super_agi_main
volumes:
- superagi_postgres_data:/var/lib/postgresql/data/
networks:
- super_network
# uncomment to expose postgres port to host
# ports:
# - "5432:5432"

proxy:
image: nginx:stable-alpine
ports:
- "3000:80"
networks:
- super_network
depends_on:
- backend
- gui
volumes:
- ./nginx/default.conf:/etc/nginx/conf.d/default.conf

networks:
super_network:
driver: bridge
volumes:
superagi_postgres_data:
redis_data:
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -158,4 +158,4 @@ google-generativeai==0.1.0
unstructured==0.8.1
ai21==1.2.6
typing-extensions==4.5.0
llama_cpp_python==0.2.7
llama_cpp_python==0.2.7
2 changes: 1 addition & 1 deletion superagi/helper/llm_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def model(self):
if self._model is None:
try:
self._model = Llama(
model_path="/app/local_model_path", n_ctx=self.context_length)
model_path="/app/local_model_path", n_ctx=self.context_length, n_gpu_layers=get_config('GPU_LAYERS', '-1'))
except Exception as e:
logger.error(e)
return self._model
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import unittest
from unittest.mock import patch
import pytest
from superagi.tools.duck_duck_go.duck_duck_go_search import DuckDuckGoSearchTool

Expand All @@ -11,9 +13,13 @@ def test_get_raw_duckduckgo_results_empty_query(self):
result = self.your_obj.get_raw_duckduckgo_results(query)
assert result == expected_result

def test_get_raw_duckduckgo_results_valid_query(self):
@patch('superagi.tools.duck_duck_go.duck_duck_go_search.DuckDuckGoSearchTool.get_raw_duckduckgo_results')
def test_get_raw_duckduckgo_results_valid_query(self, mock_get_raw_duckduckgo_results):
query = "python"
expected_result_length = 10
mock_results = ['result1', 'result2', 'result3', 'result4', 'result5',
'result6', 'result7', 'result8', 'result9', 'result10']
mock_get_raw_duckduckgo_results.return_value = mock_results
result = self.your_obj.get_raw_duckduckgo_results(query)
assert len(result) == expected_result_length

Expand Down
Loading