forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request vllm-project#2 from Bellk17/main
Merge latest
- Loading branch information
Showing
168 changed files
with
5,338 additions
and
1,878 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# This script build the Neuron docker image and run the API server inside the container. | ||
# It serves a sanity check for compilation and basic model usage. | ||
set -e | ||
|
||
# Try building the docker image | ||
aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-west-2.amazonaws.com | ||
docker build -t neuron -f Dockerfile.neuron . | ||
|
||
# Setup cleanup | ||
remove_docker_container() { docker rm -f neuron || true; } | ||
trap remove_docker_container EXIT | ||
remove_docker_container | ||
|
||
# Run the image | ||
docker run --device=/dev/neuron0 --device=/dev/neuron1 --network host --name neuron neuron python3 -m vllm.entrypoints.api_server \ | ||
--model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --max-num-seqs 8 --max-model-len 128 --block-size 128 --device neuron --tensor-parallel-size 2 & | ||
|
||
# Wait for the server to start | ||
wait_for_server_to_start() { | ||
timeout=300 | ||
counter=0 | ||
|
||
while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do | ||
sleep 1 | ||
counter=$((counter + 1)) | ||
if [ $counter -ge $timeout ]; then | ||
echo "Timeout after $timeout seconds" | ||
break | ||
fi | ||
done | ||
} | ||
wait_for_server_to_start | ||
|
||
# Test a simple prompt | ||
curl -X POST -H "Content-Type: application/json" \ | ||
localhost:8000/generate \ | ||
-d '{"prompt": "San Francisco is a"}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
name: mypy | ||
|
||
on: | ||
# Trigger the workflow on push or pull request, | ||
# but only for the main branch | ||
push: | ||
branches: | ||
- main | ||
pull_request: | ||
branches: | ||
- main | ||
|
||
jobs: | ||
ruff: | ||
runs-on: ubuntu-latest | ||
strategy: | ||
matrix: | ||
python-version: ["3.8", "3.9", "3.10", "3.11"] | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Set up Python ${{ matrix.python-version }} | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install mypy==1.9.0 | ||
pip install types-setuptools | ||
pip install types-PyYAML | ||
pip install types-requests | ||
pip install types-setuptools | ||
- name: Mypy | ||
run: | | ||
mypy vllm/attention/*.py --follow-imports=skip --config-file pyproject.toml | ||
mypy vllm/core/*.py --follow-imports=skip --config-file pyproject.toml | ||
mypy vllm/distributed/*.py --follow-imports=skip --config-file pyproject.toml | ||
mypy vllm/entrypoints/*.py --follow-imports=skip --config-file pyproject.toml | ||
mypy vllm/executor/*.py --follow-imports=skip --config-file pyproject.toml | ||
mypy vllm/usage/*.py --follow-imports=skip --config-file pyproject.toml | ||
mypy vllm/*.py --follow-imports=skip --config-file pyproject.toml | ||
mypy vllm/transformers_utils/*.py --follow-imports=skip --config-file pyproject.toml | ||
mypy vllm/engine/*.py --follow-imports=skip --config-file pyproject.toml | ||
mypy vllm/worker/*.py --follow-imports=skip --config-file pyproject.toml | ||
mypy vllm/spec_decode/*.py --follow-imports=skip --config-file pyproject.toml | ||
mypy vllm/model_executor/*.py --follow-imports=skip --config-file pyproject.toml | ||
# TODO(sang): Follow up | ||
# mypy vllm/lora/*.py --follow-imports=skip --config-file pyproject.toml | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# default base image | ||
ARG BASE_IMAGE="763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference-neuronx:2.1.1-neuronx-py310-sdk2.17.0-ubuntu20.04" | ||
|
||
FROM $BASE_IMAGE | ||
|
||
RUN echo "Base image is $BASE_IMAGE" | ||
|
||
# Install some basic utilities | ||
RUN apt-get update && apt-get install python3 python3-pip -y | ||
|
||
### Mount Point ### | ||
# When launching the container, mount the code directory to /app | ||
ARG APP_MOUNT=/app | ||
VOLUME [ ${APP_MOUNT} ] | ||
WORKDIR ${APP_MOUNT} | ||
|
||
RUN python3 -m pip install --upgrade pip | ||
RUN python3 -m pip install --no-cache-dir fastapi ninja tokenizers pandas | ||
RUN python3 -m pip install sentencepiece transformers==4.36.2 -U | ||
RUN python3 -m pip install transformers-neuronx --extra-index-url=https://pip.repos.neuron.amazonaws.com -U | ||
RUN python3 -m pip install --pre neuronx-cc==2.12.* --extra-index-url=https://pip.repos.neuron.amazonaws.com -U | ||
|
||
COPY ./vllm /app/vllm/vllm | ||
COPY ./setup.py /app/vllm/setup.py | ||
COPY ./requirements-common.txt /app/vllm/requirements-common.txt | ||
COPY ./requirements-neuron.txt /app/vllm/requirements-neuron.txt | ||
|
||
RUN cd /app/vllm \ | ||
&& python3 -m pip install -U -r requirements-neuron.txt | ||
|
||
ENV VLLM_BUILD_WITH_NEURON 1 | ||
RUN cd /app/vllm \ | ||
&& pip install -e . \ | ||
&& cd .. | ||
|
||
CMD ["/bin/bash"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -83,6 +83,7 @@ | |
"vllm._C", | ||
"numpy", | ||
"tqdm", | ||
"tensorizer", | ||
] | ||
|
||
for mock_target in autodoc_mock_imports: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.