From 3096a0e2e5fcf333bee4f23f2a435f65b4de0412 Mon Sep 17 00:00:00 2001 From: Brendan Slabe Date: Tue, 4 Jun 2024 19:39:58 -0400 Subject: [PATCH] Bump Jetstream, maxtext, jetstream-pytorch versions in Jetstream inference server guide (#695) * initial commit, working branch * bump to official maxtext version * revert server changes * remove 'slabe' images * revert checkpoint conversion changes * Remove jetstream install --- .../locust-docker/locust-tasks/tasks.py | 2 +- .../checkpoints/checkpoint_converter.sh | 6 +++--- .../inference-servers/jetstream/http-server/Dockerfile | 2 +- .../jetstream/http-server/http_server.py | 2 +- .../jetstream/maxtext/maxengine-server/Dockerfile | 10 ++-------- .../maxtext/single-host-inference/checkpoint-job.yaml | 2 +- .../maxtext/single-host-inference/deployment.yaml | 5 ++++- .../pytorch/jetstream-pytorch-server/Dockerfile | 8 +------- 8 files changed, 14 insertions(+), 23 deletions(-) diff --git a/benchmarks/benchmark/tools/locust-load-inference/locust-docker/locust-tasks/tasks.py b/benchmarks/benchmark/tools/locust-load-inference/locust-docker/locust-tasks/tasks.py index 5335e4207..cbc269f9d 100644 --- a/benchmarks/benchmark/tools/locust-load-inference/locust-docker/locust-tasks/tasks.py +++ b/benchmarks/benchmark/tools/locust-load-inference/locust-docker/locust-tasks/tasks.py @@ -333,7 +333,7 @@ class GrpcBenchmarkUser(GrpcUser): def grpc_infer(self): prompt = get_random_prompt(self) request = jetstream_pb2.DecodeRequest( - additional_text=prompt, + text_content=jetstream_pb2.DecodeRequest.TextContent(text=request.prompt), priority=0, max_tokens=model_params["max_output_len"], ) diff --git a/tutorials-and-examples/inference-servers/checkpoints/checkpoint_converter.sh b/tutorials-and-examples/inference-servers/checkpoints/checkpoint_converter.sh index aaa3a847b..c2c9a5f69 100644 --- a/tutorials-and-examples/inference-servers/checkpoints/checkpoint_converter.sh +++ b/tutorials-and-examples/inference-servers/checkpoints/checkpoint_converter.sh @@ -51,7 +51,7 @@ convert_maxtext_checkpoint() { MAXTEXT_VERSION=$5 if [ -z $MAXTEXT_VERSION ]; then - MAXTEXT_VERSION=jetstream-v0.2.0 + MAXTEXT_VERSION=jetstream-v0.2.2 fi git clone https://github.com/google/maxtext.git @@ -77,10 +77,10 @@ convert_pytorch_checkpoint() { OUTPUT_CKPT_DIR=$3 QUANTIZE=$4 PYTORCH_VERSION=$5 - JETSTREAM_VERSION=v0.2.0 + JETSTREAM_VERSION=v0.2.2 if [ -z $PYTORCH_VERSION ]; then - PYTORCH_VERSION=jetstream-v0.2.0 + PYTORCH_VERSION=jetstream-v0.2.2 fi CKPT_PATH="$(echo ${INPUT_CKPT_DIR} | awk -F'gs://' '{print $2}')" diff --git a/tutorials-and-examples/inference-servers/jetstream/http-server/Dockerfile b/tutorials-and-examples/inference-servers/jetstream/http-server/Dockerfile index 98d60a429..c9c08a642 100644 --- a/tutorials-and-examples/inference-servers/jetstream/http-server/Dockerfile +++ b/tutorials-and-examples/inference-servers/jetstream/http-server/Dockerfile @@ -4,7 +4,7 @@ FROM ubuntu:22.04 ENV DEBIAN_FRONTEND=noninteractive -ENV JETSTREAM_VERSION=v0.2.0 +ENV JETSTREAM_VERSION=v0.2.2 RUN apt -y update && apt install -y --no-install-recommends \ ca-certificates \ diff --git a/tutorials-and-examples/inference-servers/jetstream/http-server/http_server.py b/tutorials-and-examples/inference-servers/jetstream/http-server/http_server.py index e5eeb0a2e..b70f2ed29 100644 --- a/tutorials-and-examples/inference-servers/jetstream/http-server/http_server.py +++ b/tutorials-and-examples/inference-servers/jetstream/http-server/http_server.py @@ -55,7 +55,7 @@ async def generate(request: GenerateRequest): try: request = jetstream_pb2.DecodeRequest( session_cache=request.session_cache, - additional_text=request.prompt, + text_content=jetstream_pb2.DecodeRequest.TextContent(text=request.prompt), priority=request.priority, max_tokens=request.max_tokens, ) diff --git a/tutorials-and-examples/inference-servers/jetstream/maxtext/maxengine-server/Dockerfile b/tutorials-and-examples/inference-servers/jetstream/maxtext/maxengine-server/Dockerfile index 04d709690..4960aaec8 100644 --- a/tutorials-and-examples/inference-servers/jetstream/maxtext/maxengine-server/Dockerfile +++ b/tutorials-and-examples/inference-servers/jetstream/maxtext/maxengine-server/Dockerfile @@ -4,8 +4,7 @@ FROM ubuntu:22.04 ENV DEBIAN_FRONTEND=noninteractive -ENV MAXTEXT_VERSION=jetstream-v0.2.0 -ENV JETSTREAM_VERSION=v0.2.0 +ENV MAXTEXT_VERSION=jetstream-v0.2.2 RUN apt -y update && apt install -y --no-install-recommends \ ca-certificates \ @@ -16,17 +15,12 @@ RUN apt -y update && apt install -y --no-install-recommends \ RUN update-alternatives --install \ /usr/bin/python3 python3 /usr/bin/python3.10 1 -RUN git clone https://github.com/google/maxtext.git && \ -git clone https://github.com/google/JetStream.git +RUN git clone https://github.com/google/maxtext.git RUN cd maxtext/ && \ git checkout ${MAXTEXT_VERSION} && \ bash setup.sh -RUN cd /JetStream && \ -git checkout ${JETSTREAM_VERSION} && \ -pip install -e . - COPY maxengine_server_entrypoint.sh /usr/bin/ RUN chmod +x /usr/bin/maxengine_server_entrypoint.sh diff --git a/tutorials-and-examples/inference-servers/jetstream/maxtext/single-host-inference/checkpoint-job.yaml b/tutorials-and-examples/inference-servers/jetstream/maxtext/single-host-inference/checkpoint-job.yaml index 4a72d9470..977adad1c 100644 --- a/tutorials-and-examples/inference-servers/jetstream/maxtext/single-host-inference/checkpoint-job.yaml +++ b/tutorials-and-examples/inference-servers/jetstream/maxtext/single-host-inference/checkpoint-job.yaml @@ -9,7 +9,7 @@ spec: restartPolicy: Never containers: - name: inference-checkpoint - image: us-docker.pkg.dev/cloud-tpu-images/inference/inference-checkpoint:v0.2.0 + image: us-docker.pkg.dev/cloud-tpu-images/inference/inference-checkpoint:v0.2.2 args: - -b=BUCKET_NAME - -m=google/gemma/maxtext/7b-it/2 diff --git a/tutorials-and-examples/inference-servers/jetstream/maxtext/single-host-inference/deployment.yaml b/tutorials-and-examples/inference-servers/jetstream/maxtext/single-host-inference/deployment.yaml index ed3b5bf88..f95e88dd4 100644 --- a/tutorials-and-examples/inference-servers/jetstream/maxtext/single-host-inference/deployment.yaml +++ b/tutorials-and-examples/inference-servers/jetstream/maxtext/single-host-inference/deployment.yaml @@ -18,6 +18,7 @@ spec: containers: - name: maxengine-server image: us-docker.pkg.dev/cloud-tpu-images/inference/maxengine-server:v0.2.0 + imagePullPolicy: Always securityContext: privileged: true args: @@ -33,6 +34,7 @@ spec: - scan_layers=false - weight_dtype=bfloat16 - load_parameters_path=gs://BUCKET_NAME/final/unscanned/gemma_7b-it/0/checkpoints/0/items + - prometheus_port=9100 ports: - containerPort: 9000 resources: @@ -41,7 +43,8 @@ spec: limits: google.com/tpu: 8 - name: jetstream-http - image: us-docker.pkg.dev/cloud-tpu-images/inference/jetstream-http:v0.2.0 + image: us-docker.pkg.dev/cloud-tpu-images/inference/jetstream-http:v0.2.2 + imagePullPolicy: Always ports: - containerPort: 8000 --- diff --git a/tutorials-and-examples/inference-servers/jetstream/pytorch/jetstream-pytorch-server/Dockerfile b/tutorials-and-examples/inference-servers/jetstream/pytorch/jetstream-pytorch-server/Dockerfile index 75467ca30..81fcdffc9 100644 --- a/tutorials-and-examples/inference-servers/jetstream/pytorch/jetstream-pytorch-server/Dockerfile +++ b/tutorials-and-examples/inference-servers/jetstream/pytorch/jetstream-pytorch-server/Dockerfile @@ -4,8 +4,7 @@ FROM ubuntu:22.04 ENV DEBIAN_FRONTEND=noninteractive -ENV PYTORCH_JETSTREAM_VERSION=jetstream-v0.2.0 -ENV JETSTREAM_VERSION=v0.2.0 +ENV PYTORCH_JETSTREAM_VERSION=jetstream-v0.2.2 RUN apt -y update && apt install -y --no-install-recommends \ ca-certificates \ @@ -21,11 +20,6 @@ cd /jetstream-pytorch && \ git checkout ${PYTORCH_JETSTREAM_VERSION} && \ bash install_everything.sh -RUN git clone https://github.com/google/JetStream.git && \ -cd /JetStream && \ -git checkout ${JETSTREAM_VERSION} && \ -pip install -e . - ENV PYTHONPATH=$PYTHONPATH:$(pwd)/deps/xla/experimental/torch_xla2:$(pwd)/JetStream:$(pwd) COPY jetstream_pytorch_server_entrypoint.sh /usr/bin/