Skip to content

Commit

Permalink
Merge pull request #1150 from basetenlabs/bump-version-0.9.36
Browse files Browse the repository at this point in the history
Release 0.9.36
  • Loading branch information
bdubayah authored Sep 23, 2024
2 parents 71bb741 + b25a74a commit 8ae543c
Show file tree
Hide file tree
Showing 48 changed files with 1,627 additions and 1,097 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ RUN apt-get update && /bin/bash /tmp/library-scripts/common-debian.sh "${INSTALL

USER vscode
RUN curl -sSL https://install.python-poetry.org | python
ENV PATH=/home/vscode/.poetry/bin:$PATH
ENV PATH="/home/vscode/.poetry/bin:$PATH"

USER root

Expand Down
2 changes: 1 addition & 1 deletion .devcontainer/gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ RUN apt-get update && /bin/bash /tmp/library-scripts/common-debian.sh "${INSTALL

USER vscode
RUN curl -sSL https://install.python-poetry.org | python
ENV PATH=/home/vscode/.poetry/bin:$PATH
ENV PATH="/home/vscode/.poetry/bin:$PATH"

USER root

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
- uses: ./.github/actions/setup-python/
- run: poetry install
- name: run tests
run: poetry run pytest -v --cov=truss -m 'not integration' --junitxml=report.xml
run: poetry run pytest --durations=0 -m 'not integration' --junitxml=report.xml
- name: Publish Test Report # Not sure how to display this in the UI for non PRs.
uses: mikepenz/action-junit-report@v4
if: always()
Expand Down
12 changes: 6 additions & 6 deletions docker/base_images/base_image.Dockerfile.jinja
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{% if use_gpu %}
FROM nvidia/cuda:12.2.2-base-ubuntu20.04
ENV CUDNN_VERSION=8.9.5.29
ENV CUDA=12.2
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
ENV CUDNN_VERSION="8.9.5.29"
ENV CUDA="12.2"
ENV LD_LIBRARY_PATH="/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH"

RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \
apt-get update && apt-get install -y --no-install-recommends \
Expand All @@ -21,8 +21,8 @@ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/
rm -rf /var/lib/apt/lists/*

# Allow statements and log messages to immediately appear in the Knative logs
ENV PYTHONUNBUFFERED True
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED="True"
ENV DEBIAN_FRONTEND="noninteractive"

RUN apt update && \
apt install -y bash \
Expand All @@ -49,7 +49,7 @@ FROM python:{{python_version}}
RUN apt update && apt install -y

# Allow statements and log messages to immediately appear in the Knative logs
ENV PYTHONUNBUFFERED True
ENV PYTHONUNBUFFERED="True"
{% endif %}


Expand Down
858 changes: 437 additions & 421 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "truss"
version = "0.9.35"
version = "0.9.36"
description = "A seamless bridge from model development to model delivery"
license = "MIT"
readme = "README.md"
Expand Down
8 changes: 4 additions & 4 deletions truss-chains/examples/audio-transcription/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ whisper transcription model (which has slower deployment times), they are in
the current setup deployed separately:

```bash
truss chains deploy whisper_chainlet.py
truss chains push whisper_chainlet.py
```

Insert the predict URL for the Whisper Chainlet (printed by above deploy
Insert the predict URL for the Whisper Chainlet (printed by above push
command or can be found on the status page) as a value for
`WHISPER_PREDICT_URL` in `transcribe.py`. The deploy the transcribe chain.
`WHISPER_PREDICT_URL` in `transcribe.py`. Then push the transcribe chain.

```bash
truss chains deploy transcribe.py
truss chains push transcribe.py
```

An example local invocation of the chain is given in the main-section of
Expand Down
4 changes: 2 additions & 2 deletions truss-chains/examples/rag/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ python rag_chain.py
Deploy the Chain to production:

```sh
truss chains deploy rag_chain.py
truss chains push rag_chain.py
```

Note that this command will print you with an example cURL command how to
Expand All @@ -57,7 +57,7 @@ For example a chain invocation might look like this (you need to update the
URL):

```sh
curl -X POST 'https://model-5wo86nn3.api.baseten.co/development/predict' \
curl -X POST 'https://chain-<CHAIN_ID>.api.baseten.co/development/run_remote' \
-H "Authorization: Api-Key $BASETEN_API_KEY" \
-d '{"new_bio": "Sam just moved to Manhattan for his new job at a large bank.In college, he enjoyed building sets for student plays."}'
```
Expand Down
5 changes: 2 additions & 3 deletions truss-chains/truss_chains/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,8 @@ def _push_service(
else:
raise NotImplementedError(options)

logging.info(
f"Pushed `{chainlet_descriptor.display_name}` @ {service.predict_url}."
)
logging.info(f"Pushed `{chainlet_descriptor.display_name}`")
logging.debug(f"Internal model endpoint: `{service.predict_url}`")
return service


Expand Down
6 changes: 6 additions & 0 deletions truss/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import warnings
from pathlib import Path

from pydantic import PydanticDeprecatedSince20
from single_source import get_version

# Suppress Pydantic V1 warnings, because we have to use it for backwards compat.
warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20)


__version__ = get_version(__name__, Path(__file__).parent.parent)


Expand Down
2 changes: 1 addition & 1 deletion truss/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ def _create_chains_table(service) -> Tuple[rich.table.Table, List[str]]:
@click.option(
"--publish/--no-publish",
type=bool,
default=True,
default=False,
help="Create chainlets as published deployments.",
)
@click.option(
Expand Down
18 changes: 17 additions & 1 deletion truss/config/trt_llm.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
import json
import logging
import warnings
from enum import Enum
from typing import Optional

from pydantic import BaseModel, validator
from huggingface_hub.errors import HFValidationError
from huggingface_hub.utils import validate_repo_id
from pydantic import BaseModel, PydanticDeprecatedSince20, validator
from rich.console import Console

# Suppress Pydantic V1 warnings, because we have to use it for backwards compat.
warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -97,6 +103,8 @@ def __init__(self, **data):
super().__init__(**data)
self._validate_minimum_required_configuration()
self._validate_kv_cache_flags()
if self.build.checkpoint_repository.source == CheckpointSource.HF:
self._validate_hf_repo_id()

# In pydantic v2 this would be `@model_validator(mode="after")` and
# the __init__ override can be removed.
Expand Down Expand Up @@ -131,6 +139,14 @@ def _validate_kv_cache_flags(self):
raise ValueError("Using fp8 context fmha requires paged context fmha")
return self

def _validate_hf_repo_id(self):
try:
validate_repo_id(self.build.checkpoint_repository.repo)
except HFValidationError as e:
raise ValueError(
f"HuggingFace repository validation failed: {str(e)}"
) from e

@property
def requires_build(self):
if self.build is not None:
Expand Down
2 changes: 1 addition & 1 deletion truss/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@

REGISTRY_BUILD_SECRET_PREFIX = "DOCKER_REGISTRY_"

TRTLLM_BASE_IMAGE = "baseten/briton-server:5fa9436e_v0.0.9"
TRTLLM_BASE_IMAGE = "baseten/briton-server:5fa9436e_v0.0.11"
TRTLLM_PYTHON_EXECUTABLE = "/usr/bin/python3"
BASE_TRTLLM_REQUIREMENTS = [
"grpcio==1.62.3",
Expand Down
4 changes: 2 additions & 2 deletions truss/contexts/image_builder/serving_image_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from google.cloud import storage
from huggingface_hub import get_hf_file_metadata, hf_hub_url, list_repo_files
from huggingface_hub.utils import filter_repo_objects
from truss import constants
from truss.config.trt_llm import TrussTRTLLMModel
from truss.constants import (
AUDIO_MODEL_TRTLLM_REQUIREMENTS,
Expand Down Expand Up @@ -70,7 +71,6 @@
GCS_CREDENTIALS = "service_account.json"
S3_CREDENTIALS = "s3_credentials.json"

HF_ACCESS_TOKEN_SECRET_NAME = "hf_access_token"
HF_ACCESS_TOKEN_FILE_NAME = "hf-access-token"

CLOUD_BUCKET_CACHE = Path("/app/model_cache/")
Expand Down Expand Up @@ -526,7 +526,7 @@ def _render_dockerfile(
build_dir / USER_SUPPLIED_REQUIREMENTS_TXT_FILENAME
)

hf_access_token = config.secrets.get(HF_ACCESS_TOKEN_SECRET_NAME)
hf_access_token = config.secrets.get(constants.HF_ACCESS_TOKEN_KEY)
dockerfile_contents = dockerfile_template.render(
should_install_server_requirements=should_install_server_requirements,
base_image_name_and_tag=base_image_name_and_tag,
Expand Down
4 changes: 4 additions & 0 deletions truss/remote/baseten/service.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import enum
import time
import urllib.parse
import warnings
from typing import (
Any,
Dict,
Expand All @@ -17,6 +18,9 @@
from truss.truss_handle import TrussHandle
from truss.util.errors import RemoteNetworkError

# "classes created inside an enum will not become a member" -> intended here anyway.
warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*enum.*")

DEFAULT_STREAM_ENCODING = "utf-8"


Expand Down
7 changes: 4 additions & 3 deletions truss/remote/remote_factory.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import inspect

try:
from configparser import DEFAULTSECT, ConfigParser # type: ignore
except ImportError:
# We need to do this for old python.
from configparser import DEFAULTSECT
from configparser import SafeConfigParser as ConfigParser
except ImportError:
# We need to do this for py312 and onwards.
from configparser import DEFAULTSECT, ConfigParser # type: ignore


from functools import partial
from operator import is_not
Expand Down
8 changes: 4 additions & 4 deletions truss/templates/base.Dockerfile.jinja
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ARG PYVERSION={{config.python_version}}
FROM {{base_image_name_and_tag}} as truss_server
FROM {{base_image_name_and_tag}} AS truss_server

ENV PYTHON_EXECUTABLE {{ config.base_image.python_executable_path or 'python3' }}
ENV PYTHON_EXECUTABLE="{{ config.base_image.python_executable_path or 'python3' }}"

{% block fail_fast %}
RUN grep -w 'ID=debian\|ID_LIKE=debian' /etc/os-release || { echo "ERROR: Supplied base image is not a debian image"; exit 1; }
Expand Down Expand Up @@ -52,7 +52,7 @@ RUN pip install -r {{config_requirements_filename}} --no-cache-dir && rm -rf /ro



ENV APP_HOME /app
ENV APP_HOME="/app"
WORKDIR $APP_HOME


Expand All @@ -68,7 +68,7 @@ COPY ./{{config.bundled_packages_dir}} /packages


{% for env_var_name, env_var_value in config.environment_variables.items() %}
ENV {{ env_var_name }} {{ env_var_value }}
ENV {{ env_var_name }}="{{ env_var_value }}"
{% endfor %}

{% block run %}
Expand Down
6 changes: 3 additions & 3 deletions truss/templates/cache.Dockerfile.jinja
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
FROM python:3.11-slim as cache_warmer
FROM python:3.11-slim AS cache_warmer

RUN mkdir -p /app/model_cache
WORKDIR /app

{% if hf_access_token %}
ENV HUGGING_FACE_HUB_TOKEN {{hf_access_token}}
ENV HUGGING_FACE_HUB_TOKEN="{{hf_access_token}}"
{% endif %}

RUN apt-get -y update; apt-get -y install curl; curl -s https://baseten-public.s3.us-west-2.amazonaws.com/bin/b10cp-5fe8dc7da-linux-amd64 -o /app/b10cp; chmod +x /app/b10cp
ENV B10CP_PATH_TRUSS /app/b10cp
ENV B10CP_PATH_TRUSS="/app/b10cp"
COPY ./cache_requirements.txt /app/cache_requirements.txt
RUN pip install -r /app/cache_requirements.txt --no-cache-dir && rm -rf /root/.cache/pip
COPY ./cache_warmer.py /cache_warmer.py
Expand Down
2 changes: 1 addition & 1 deletion truss/templates/control/control/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def run(self):
"inference_server_home": self._inf_serv_home,
"inference_server_process_args": [
self._python_executable_path,
f"{self._inf_serv_home}/inference_server.py",
f"{self._inf_serv_home}/main.py",
],
"control_server_host": "0.0.0.0",
"control_server_port": self._control_server_port,
Expand Down
16 changes: 8 additions & 8 deletions truss/templates/server.Dockerfile.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
{% block base_image_patch %}
# If user base image is supplied in config, apply build commands from truss base image
{% if config.base_image %}
ENV PYTHONUNBUFFERED True
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED="True"
ENV DEBIAN_FRONTEND="noninteractive"

RUN apt update && \
apt install -y bash \
Expand Down Expand Up @@ -90,14 +90,14 @@ COPY ./{{ config.model_module_dir }} /app/model

{% block run %}
{%- if config.live_reload %}
ENV HASH_TRUSS {{truss_hash}}
ENV CONTROL_SERVER_PORT 8080
ENV INFERENCE_SERVER_PORT 8090
ENV HASH_TRUSS="{{truss_hash}}"
ENV CONTROL_SERVER_PORT="8080"
ENV INFERENCE_SERVER_PORT="8090"
ENV SERVER_START_CMD="/control/.env/bin/python3 /control/control/server.py"
ENTRYPOINT ["/control/.env/bin/python3", "/control/control/server.py"]
{%- else %}
ENV INFERENCE_SERVER_PORT 8080
ENV SERVER_START_CMD="{{(config.base_image.python_executable_path or "python3") ~ " /app/inference_server.py"}}"
ENTRYPOINT ["{{config.base_image.python_executable_path or "python3"}}", "/app/inference_server.py"]
ENV INFERENCE_SERVER_PORT="8080"
ENV SERVER_START_CMD="{{(config.base_image.python_executable_path or "python3") ~ " /app/main.py"}}"
ENTRYPOINT ["{{config.base_image.python_executable_path or "python3"}}", "/app/main.py"]
{%- endif %}
{% endblock %}
Loading

0 comments on commit 8ae543c

Please sign in to comment.