mlsys-io · tjluyao · Feb 1, 2024 · Feb 2, 2024 · Feb 2, 2024 · Feb 7, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,4 @@
+aml
+target
+server/transformers
+server/flash-attention
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -0,0 +1,67 @@
+name: "\U0001F41B Bug Report"
+description: Submit a bug report to help us improve text-generation-inference
+body:
+  - type: textarea
+    id: system-info
+    attributes:
+      label: System Info
+      description: |
+        Please share your system info with us (`text-generation-launcher --env` if installed locally).
+        The full command line used that causes issues:
+        OS version:
+        Rust version (if self-compiling, `cargo version`):
+        Model being used (`curl 127.0.0.1:8080/info | jq`):
+          If local model please explicit the kind of model and/or equivalents.
+        Hardware used (GPUs, how many, on which cloud) (`nvidia-smi`):
+        Deployment specificities (Kubernetes, EKS, AKS, any particular deployments):
+        The current version being used:
+
+      placeholder: text-generation-inference version, platform, python version, ...
+    validations:
+      required: true
+
+  - type: checkboxes
+    id: information-scripts-examples
+    attributes:
+      label: Information
+      description: 'The problem arises when using:'
+      options:
+        - label: "Docker"
+        - label: "The CLI directly"
+
+  - type: checkboxes
+    id: information-tasks
+    attributes:
+      label: Tasks
+      description: "The thing I am working on is:"
+      options:
+        - label: "An officially supported command"
+        - label: "My own modifications"
+
+  - type: textarea
+    id: reproduction
+    validations:
+      required: true
+    attributes:
+      label: Reproduction
+      description: |
+        Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
+        If you have code snippets, error messages, stack traces please provide them here as well.
+        Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
+        Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
+
+      placeholder: |
+        Steps to reproduce the behavior:
+
+          1.
+          2.
+          3.
+
+
+  - type: textarea
+    id: expected-behavior
+    validations:
+      required: true
+    attributes:
+      label: Expected behavior
+      description: "A clear and concise description of what you would expect to happen."
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,2 @@
+blank_issues_enabled: true
+version: 2.1
diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml
@@ -0,0 +1,31 @@
+name: "\U0001F680 Feature request"
+description: Submit a proposal/request for a new text-generation-inference feature
+labels: [ "feature" ]
+body:
+  - type: textarea
+    id: feature-request
+    validations:
+      required: true
+    attributes:
+      label: Feature request
+      description: |
+        A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist.
+
+  - type: textarea
+    id: motivation
+    validations:
+      required: true
+    attributes:
+      label: Motivation
+      description: |
+        Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
+
+
+  - type: textarea
+    id: contribution
+    validations:
+      required: true
+    attributes:
+      label: Your contribution
+      description: |
+        Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/huggingface/text-generation-inference/blob/main/CONTRIBUTING.md)
diff --git a/.github/ISSUE_TEMPLATE/new-model-addition.yml b/.github/ISSUE_TEMPLATE/new-model-addition.yml
@@ -0,0 +1,31 @@
+name: "\U0001F31F New model addition"
+description: Submit a proposal/request to implement a new model
+labels: [ "New model" ]
+
+body:
+  - type: textarea
+    id: description-request
+    validations:
+      required: true
+    attributes:
+      label: Model description
+      description: |
+        Put any and all important information relative to the model
+
+  - type: checkboxes
+    id: information-tasks
+    attributes:
+      label: Open source status
+      description: |
+          Please note that if the model implementation isn't available or if the weights aren't open-source, we are less likely to implement it in `transformers`.
+      options:
+        - label: "The model implementation is available"
+        - label: "The model weights are available"
+
+  - type: textarea
+    id: additional-info
+    attributes:
+      label: Provide useful links for the implementation
+      description: |
+        Please provide information regarding the implementation, the weights, and the authors.
+        Please mention the authors by @gh-username if you're aware of their usernames.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,40 @@
+# What does this PR do?
+
+<!--
+Congratulations! You've made it this far! You're not quite done yet though.
+
+Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflects the extent of your awesome contribution.
+
+Then, please replace this with a description of the change and which issue is fixed (if applicable). Please also include relevant motivation and context. List any dependencies (if any) that are required for this change.
+
+Once you're done, someone will review your PR shortly (see the section "Who can review?" below to tag some potential reviewers). They may suggest changes to make the code even better. If no one reviewed your PR after a week has passed, don't hesitate to post a new comment @-mentioning the same persons---sometimes notifications get lost.
+-->
+
+<!-- Remove if not applicable -->
+
+Fixes # (issue)
+
+
+## Before submitting
+- [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case).
+- [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
+      Pull Request section?
+- [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link
+      to it if that's the case.
+- [ ] Did you make sure to update the documentation with your changes? Here are the
+      [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and
+      [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
+- [ ] Did you write any new necessary tests?
+
+
+## Who can review?
+
+Anyone in the community is free to review the PR once the tests have passed. Feel free to tag
+members/contributors who may be interested in your PR.
+
+<!-- Your PR will be replied to more quickly if you can figure out the right person to tag with @
+
+
+@OlivierDehaene OR @Narsil
+
+ -->
diff --git a/.github/workflows/autodocs.yml b/.github/workflows/autodocs.yml
@@ -0,0 +1,20 @@
+name: Automatic Documentation for Launcher
+
+on:
+  pull_request:
+
+jobs:
+  update_docs:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v2
+
+    - name: Install Launcher
+      id: install-launcher
+      run: cargo install --path launcher/
+    - name: Check launcher Docs are up-to-date
+      run: |
+        echo text-generation-launcher --help
+        python update_doc.py --check
diff --git a/.github/workflows/build-kvrun.yml b/.github/workflows/build-kvrun.yml
@@ -0,0 +1,100 @@
+name: Build and push kv.run docker image
+
+on:
+  workflow_dispatch:
+
+jobs:
+  build-and-push-image:
+    runs-on: [self-hosted, Linux, X64]
+
+    concurrency:
+      group: ${{ github.workflow }}-build-and-push-image-${{ github.head_ref || github.run_id }}
+      cancel-in-progress: true
+
+    permissions:
+      contents: write
+      packages: write
+      # This is used to complete the identity challenge
+      # with sigstore/fulcio when running outside of PRs.
+      id-token: write
+      security-events: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+      - name: Initialize Docker Buildx
+        uses: docker/setup-buildx-action@v2.0.0
+        with:
+          install: true
+      - name: Inject slug/short variables
+        uses: rlespinasse/github-slug-action@v4.4.1
+      - name: Login to GitHub Container Registry
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v4.3.0
+        with:
+          flavor: |
+            latest=auto
+          images: |
+            ghcr.io/${{env.GITHUB_REPOSITORY}}
+          tags: |
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
+            type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
+      - name: Build and push Docker image
+        id: build-and-push
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile_kvrun
+          push: true
+          platforms: 'linux/amd64'
+          build-args: |
+            GIT_SHA=${{ env.GITHUB_SHA }}
+            DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          #cache-from: type=gha
+          #cache-to: type=gha,mode=max
+
+#  integration-tests:
+#    runs-on: [self-hosted, Linux, X64]
+#
+#    concurrency:
+#      group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
+#      cancel-in-progress: true
+#
+#    needs:
+#      - build-and-push-image # Wait for the docker image to be built
+#
+#    env:
+#      DOCKER_VOLUME: /cache
+#
+#    steps:
+#      - uses: actions/checkout@v2
+#      - name: Inject slug/short variables
+#        uses: rlespinasse/github-slug-action@v4.4.1
+#      - name: Set up Python
+#        uses: actions/setup-python@v4
+#        with:
+#          python-version: 3.10.14
+#      - name: Prepare disks
+#        run: |
+#          sudo mkfs -t ext4 /dev/nvme1n1
+#          sudo mkdir ${{ env.DOCKER_VOLUME }}
+#          sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }}
+#      - name: Install
+#        run: |
+#          make install-integration-tests
+#      - name: Run tests
+#        run: |
+#          export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
+#          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+#          pytest -s -vv integration-tests
diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml
@@ -0,0 +1,20 @@
+name: Build documentation
+
+on:
+  push:
+    paths:
+      - "docs/source/**"
+    branches:
+      - main
+      - doc-builder*
+      - v*-release
+
+jobs:
+   build:
+    uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
+    with:
+      commit_sha: ${{ github.sha }}
+      package: text-generation-inference
+      additional_args: --not_python_module
+    secrets:
+      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml
@@ -0,0 +1,19 @@
+name: Build PR Documentation
+
+on:
+  pull_request:
+    paths:
+      - "docs/source/**"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
+    with:
+      commit_sha: ${{ github.event.pull_request.head.sha }}
+      pr_number: ${{ github.event.number }}
+      package: text-generation-inference
+      additional_args: --not_python_module
diff --git a/.github/workflows/client-tests.yaml b/.github/workflows/client-tests.yaml
@@ -0,0 +1,26 @@
+name: Python Client Tests
+
+on:
+  pull_request:
+    paths:
+      - ".github/workflows/client-tests.yaml"
+      - "clients/python/**"
+
+jobs:
+  run_tests:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: 3.9
+      - name: Install
+        run: |
+          cd clients/python && pip install .
+      - name: Run tests
+        run: |
+          pip install pytest pytest-asyncio
+          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+          make python-client-tests