build(deps): bump llama-cpp-python from 0.3.1 to 0.3.2 #95
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build and Push | |
on: | |
push: | |
branches: ["main"] | |
paths-ignore: | |
- "**.md" | |
pull_request: | |
paths-ignore: | |
- "**.md" | |
workflow_dispatch: | |
env: | |
QUAY_REPO: "quay.io/dtrifiro/llama-cpp-python-serving" | |
CI: true | |
DOCKER_BUILDKIT: 1 | |
FORCE_COLOR: 1 | |
jobs: | |
build-and-push: | |
name: Build and Push Image | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- name: "Setup Docker Buildx" | |
uses: docker/setup-buildx-action@v3 | |
- name: "Build" | |
run: | | |
date="$(date +%Y%m%d%H%M)" | |
rev="$(git rev-parse --short HEAD)" | |
docker build -t "${QUAY_REPO}:latest" -t "${QUAY_REPO}:${rev}" -t "${QUAY_REPO}:${date}" . | |
- name: "Build CUDA image" | |
run: | | |
date="$(date +%Y%m%d%H%M)" | |
rev="$(git rev-parse --short HEAD)" | |
docker build -t "${QUAY_REPO}:latest-cuda" -t "${QUAY_REPO}:${rev}-cuda" -t "${QUAY_REPO}:${date}-cuda" -f Dockerfile.cuda . | |
- name: "Push to quay" | |
if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request' | |
run: | | |
docker login -u="${{secrets.QUAY_USERNAME}}" -p="${{secrets.QUAY_TOKEN}}" quay.io | |
docker push --all-tags "${QUAY_REPO}" | |
smoke-test: | |
name: "Smoke test" | |
needs: build-and-push | |
if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request' | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: dtrifiro/setup-kserve@v0.0.2 | |
- name: Setup Model | |
run: | | |
bash scripts/setup_model.sh | |
- name: Deploy InferenceService/ServingRuntime | |
run: | | |
bash scripts/deploy.sh cpu | |
- name: Perform sample inference | |
run: | | |
ISVC_HOST=$(kubectl get isvc llama-cpp-python -o jsonpath='{.status.components.predictor.url}' | cut -d / -f 3-) | |
# Add the ISVC_HOST to /etc/hosts since we can't resolve it | |
echo "127.0.0.1 ${ISVC_HOST}" | sudo tee -a /etc/hosts | |
bash scripts/inference_simple.sh |