Skip to content

Commit

Permalink
added correct neuron patch and updated docker image
Browse files Browse the repository at this point in the history
  • Loading branch information
ratnopamc committed Aug 20, 2024
1 parent 78a0e71 commit 6379ac7
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,16 @@ index e7f0e887..87564b76 100644
assert execute_model_req.num_lookahead_slots == 0, (
"lookahead not supported for Neuron backend.")


diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index cd29db7..6814348 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -321,7 +321,6 @@ class EngineArgs:
parser.add_argument('--block-size',
type=int,
default=EngineArgs.block_size,
- choices=[8, 16, 32],
help='Token block size for contiguous chunks of '
'tokens.')
parser.add_argument('--block-size',
type=int,
default=EngineArgs.block_size,
- choices=[8, 16, 32],
help='Token block size for contiguous chunks of '
'tokens.')

Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
# Step1: cd gen-ai/inference/vllm-rayserve-inf2
# Step2 (Optional): Build a Docker image and push it to ECR and use the image in the yaml
# Step3: kubectl apply -f vllm-rayserve-deployment.yaml
# Step4: python3 openai-client.py
# Step4.a: pip install openai
# Step 4.b: python3 openai-client.py
#-------------------------------------------------------------------

apiVersion: v1
Expand Down Expand Up @@ -190,7 +191,7 @@ spec:
schedulerName: my-scheduler # Correct placement
containers:
- name: head
image: public.ecr.aws/data-on-eks/vllm-ray-neuron-mistral7b:latest
image: public.ecr.aws/data-on-eks/vllm-ray2.32.0-inf2-llama3:latest
imagePullPolicy: Always
lifecycle:
preStop:
Expand Down Expand Up @@ -247,7 +248,7 @@ spec:
schedulerName: my-scheduler # Correct placement
containers:
- name: worker
image: public.ecr.aws/data-on-eks/vllm-ray-neuron-mistral7b:latest
image: public.ecr.aws/data-on-eks/vllm-ray2.32.0-inf2-llama3:latest
imagePullPolicy: Always
lifecycle:
preStop:
Expand Down

0 comments on commit 6379ac7

Please sign in to comment.