From 55bfc993cc27fd25ae5089d58ae822bfeca296a3 Mon Sep 17 00:00:00 2001 From: Ryan Mullins Date: Thu, 22 Aug 2024 00:33:10 +0000 Subject: [PATCH] Adding gunicorn config for model_server.py --- lit_nlp/examples/gcp/Dockerfile | 6 ++--- lit_nlp/examples/gcp/README.md | 7 ++++++ .../gcp/{ model_server.py => model_server.py} | 7 ++---- .../gcp/model_server_gunicorn_config.py | 25 +++++++++++++++++++ 4 files changed, 37 insertions(+), 8 deletions(-) create mode 100644 lit_nlp/examples/gcp/README.md rename lit_nlp/examples/gcp/{ model_server.py => model_server.py} (92%) create mode 100644 lit_nlp/examples/gcp/model_server_gunicorn_config.py diff --git a/lit_nlp/examples/gcp/Dockerfile b/lit_nlp/examples/gcp/Dockerfile index dd4bd575..3f6a5536 100644 --- a/lit_nlp/examples/gcp/Dockerfile +++ b/lit_nlp/examples/gcp/Dockerfile @@ -73,7 +73,7 @@ RUN rm -rf /var/lib/apt/lists/* # ---- LIT on GCP from source ---- -FROM base AS lit-gcp-dev +FROM base AS lit-gcp-model-server-dev ENV APP_HOME /app WORKDIR $APP_HOME @@ -90,7 +90,7 @@ RUN echo "deb https://dl.yarnpkg.com/debian/ stable main" | \ RUN apt update && apt -y install yarn # TODO(b/353980272): Replace the default config with the GCP-specific config -COPY ./lit_nlp/examples/gunicorn_config.py ./ +COPY ./lit_nlp/examples/gcp/model_server_gunicorn_config.py ./ # TODO(b/353980272): Replace this with a requirements file specific to the GCP # exmaple, this should include the core lit-nlp package. @@ -109,4 +109,4 @@ RUN yarn && yarn build && rm -rf node_modules/* # TODO(b/353980272): Replace this with the GCP-specific config # See https://github.com/PAIR-code/lit/blob/main/Dockerfile WORKDIR $APP_HOME -ENTRYPOINT ["gunicorn", "--config=gunicorn_config.py"] +ENTRYPOINT ["gunicorn", "--config=model_server_gunicorn_config.py"] diff --git a/lit_nlp/examples/gcp/README.md b/lit_nlp/examples/gcp/README.md new file mode 100644 index 00000000..d2efba25 --- /dev/null +++ b/lit_nlp/examples/gcp/README.md @@ -0,0 +1,7 @@ +# Using LLMs in LIT on Google Cloud Platform + +Architectural Notes + +* The `LitApp` HTTP API assumes that inputs will be passed around as + identifiers and the reconsituted on the LitApp server before being sent to + the model. The `model_server.py` will not have direct access to the loaded Datasets, and thus the HTTP API assumes that the JSON data passed to its endpoints will be the complete, reconstituted examples from the `LitApp`. The `model_server.py` will send back predictions in full JSON format. \ No newline at end of file diff --git a/lit_nlp/examples/gcp/ model_server.py b/lit_nlp/examples/gcp/model_server.py similarity index 92% rename from lit_nlp/examples/gcp/ model_server.py rename to lit_nlp/examples/gcp/model_server.py index 3267a94a..3cc432e8 100644 --- a/lit_nlp/examples/gcp/ model_server.py +++ b/lit_nlp/examples/gcp/model_server.py @@ -5,24 +5,21 @@ import os from typing import Optional from absl import app -from absl import flags from lit_nlp import dev_server from lit_nlp.examples.prompt_debugging import models as prompt_debugging_models from lit_nlp.lib import serialize from lit_nlp.lib import wsgi_app -_FLAGS = flags.FLAGS - DEFAULT_DL_FRAMEWORK = 'kerasnlp' DEFAULT_DL_RUNTIME = 'tensorflow' DEFAULT_PRECISION = 'bfloat16' DEFAULT_SEQUENCE_LENGTH = 512 DEFAULT_BATCH_SIZE = 1 -DEFAULT_MODELS = 'gemma_1.1_instruct_2b_en:/cns/je-d/home/mattdangerw/keras/gemma/gemma_1.1_instruct_2b_en/3/' +DEFAULT_MODELS = 'gemma_1.1_2b_IT:gemma_1.1_instruct_2b_en' def get_wsgi_app() -> wsgi_app.App: - """Return WSGI app for container-hosted demos.""" + """Return WSGI app for an LLM server.""" def wrap_handler(predict_fn): @functools.wraps(predict_fn) diff --git a/lit_nlp/examples/gcp/model_server_gunicorn_config.py b/lit_nlp/examples/gcp/model_server_gunicorn_config.py new file mode 100644 index 00000000..647a275e --- /dev/null +++ b/lit_nlp/examples/gcp/model_server_gunicorn_config.py @@ -0,0 +1,25 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""gunicorn configuration for cloud-hosted demos.""" + +import os + +_PORT = os.getenv('PORT', '5432') + +bind = f'0.0.0.0:{_PORT}' +timeout = 3600 +threads = 8 +worker_class = 'gthread' +wsgi_app = f'lit_nlp.examples.gcp.model_server:get_wsgi_app()'