diff --git a/deepseek-r1-aws.md b/deepseek-r1-aws.md index 775393e9e8..b80e504fb6 100644 --- a/deepseek-r1-aws.md +++ b/deepseek-r1-aws.md @@ -191,9 +191,9 @@ model_name = hf_model_id.split("/")[-1].lower() # Hub Model configuration hub = { "HF_MODEL_ID": model_id, - "HF_NUM_CORES": "24", + "HF_NUM_CORES": "16", "HF_AUTO_CAST_TYPE": "bf16", - "MAX_BATCH_SIZE": "4", + "MAX_BATCH_SIZE": "8", "MAX_INPUT_TOKENS": "3686", "MAX_TOTAL_TOKENS": "4096", } @@ -263,17 +263,13 @@ docker run -p 8080:80 \ --device=/dev/neuron5 \ --device=/dev/neuron6 \ --device=/dev/neuron7 \ - --device=/dev/neuron8 \ - --device=/dev/neuron9 \ - --device=/dev/neuron10 \ - --device=/dev/neuron11 \ -e HF_BATCH_SIZE=4 \ -e HF_SEQUENCE_LENGTH=4096 \ -e HF_AUTO_CAST_TYPE="bf16" \ - -e HF_NUM_CORES=24 \ + -e HF_NUM_CORES=16 \ ghcr.io/huggingface/neuronx-tgi:latest \ --model-id deepseek-ai/DeepSeek-R1-Distill-Llama-70B \ - --max-batch-size 4 \ + --max-batch-size 8 \ --max-total-tokens 4096 ```