cog.yaml

# Configuration for Cog ⚙️
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md

build:
  # set to true if your model requires a GPU
  gpu: true

  # a list of ubuntu apt packages to install
  # system_packages:
  #   - "libgl1-mesa-glx"
  #   - "libglib2.0-0"

  # python version in the form '3.11' or '3.11.4'
  python_version: "3.10.12"

  # a list of packages in the format <package-name>==<version>
  python_packages:
    - torch==2.0.1+cu118
    - --extra-index-url https://download.pytorch.org/whl/cu118
    - torchvision==0.15.2+cu118
    - --extra-index-url https://download.pytorch.org/whl/cu118
    - torchaudio==2.0.2+cu118
    - --extra-index-url https://download.pytorch.org/whl/cu118
    - safetensors
    - sentencepiece
    - psutil
    - "https://github.com/jllllll/exllama/releases/download/0.0.14/exllama-0.0.14+cu118-cp310-cp310-linux_x86_64.whl"

  # commands run after the environment is setup
  run:
    - "wget https://huggingface.co/v2ray/xformers-wheels/resolve/main/xformers-torch2.0.1%2Bcu118-0.0.21.dev575-cp310-cp310-manylinux2014_x86_64.whl && pip3 install xformers-torch2.0.1+cu118-0.0.21.dev575-cp310-cp310-manylinux2014_x86_64.whl && rm -rf xformers-torch2.0.1+cu118-0.0.21.dev575-cp310-cp310-manylinux2014_x86_64.whl"
    - "wget https://huggingface.co/v2ray/xformers-wheels/resolve/main/flash_attn-2.0.2-cp310-cp310-linux_x86_64.whl && pip3 install flash_attn-2.0.2-cp310-cp310-linux_x86_64.whl && rm -rf flash_attn-2.0.2-cp310-cp310-linux_x86_64.whl"
    - "pip3 cache purge && apt-get clean"
    - "echo env ready."

# predict.py defines how predictions are run on your model
predict: "predict.py:Predictor"