From 7eff0053e42b836e508cd2b91020d6ca09539f01 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Tue, 15 Dec 2020 15:01:20 -0800 Subject: [PATCH 01/11] Update docs --- dev/generate_cli_md.sh | 6 +- docs/aws/uninstall.md | 9 - docs/{workloads/cli.md => cli/commands.md} | 38 ++--- docs/cli/install.md | 44 +++++ docs/{workloads => cli}/python-client.md | 0 docs/{workloads => cli}/telemetry.md | 2 +- docs/cli/uninstall.md | 8 + docs/{guides => }/contributing.md | 0 docs/gcp/uninstall.md | 11 +- docs/guides/cli.md | 54 ------ docs/guides/docker-hub-rate-limiting.md | 186 --------------------- docs/guides/single-node-deployment.md | 117 ------------- docs/summary.md | 15 +- docs/workloads/environments.md | 98 ----------- 14 files changed, 87 insertions(+), 501 deletions(-) rename docs/{workloads/cli.md => cli/commands.md} (96%) create mode 100644 docs/cli/install.md rename docs/{workloads => cli}/python-client.md (100%) rename docs/{workloads => cli}/telemetry.md (95%) create mode 100644 docs/cli/uninstall.md rename docs/{guides => }/contributing.md (100%) delete mode 100644 docs/guides/cli.md delete mode 100644 docs/guides/docker-hub-rate-limiting.md delete mode 100644 docs/guides/single-node-deployment.md delete mode 100644 docs/workloads/environments.md diff --git a/dev/generate_cli_md.sh b/dev/generate_cli_md.sh index 70a673f8e1..f31c1a18d8 100755 --- a/dev/generate_cli_md.sh +++ b/dev/generate_cli_md.sh @@ -18,7 +18,7 @@ set -e ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. >/dev/null && pwd)" -out_file=$ROOT/docs/workloads/cli.md +out_file=$ROOT/docs/cli/commands.md rm -f $out_file echo "building cli ..." @@ -28,7 +28,7 @@ make --no-print-directory -C $ROOT cli cli_config_backup_path=$HOME/.cortex/cli-bak-$RANDOM.yaml mv $HOME/.cortex/cli.yaml $cli_config_backup_path -echo "# CLI reference" >> $out_file +echo "# Commands" >> $out_file echo "" >> $out_file echo '_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_' >> $out_file @@ -57,7 +57,7 @@ echo "running help commands ..." for cmd in "${commands[@]}"; do echo '' >> $out_file - echo "### ${cmd}" >> $out_file + echo "## ${cmd}" >> $out_file echo '' >> $out_file echo '```text' >> $out_file $ROOT/bin/cortex help ${cmd} >> $out_file diff --git a/docs/aws/uninstall.md b/docs/aws/uninstall.md index 17d86e9d9e..5a094e2de0 100644 --- a/docs/aws/uninstall.md +++ b/docs/aws/uninstall.md @@ -2,19 +2,10 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -## Spin down Cortex - ```bash -# spin down Cortex cortex cluster down - -# uninstall the CLI -pip uninstall cortex -rm -rf ~/.cortex ``` -If you modified your bash profile, you may wish to remove `source <(cortex completion bash)` from it (or remove `source <(cortex completion zsh)` for `zsh`). - ## Delete metadata and log groups Since you may wish to have access to your data after spinning down your cluster, Cortex's bucket and log groups are not automatically deleted when running `cortex cluster down`. diff --git a/docs/workloads/cli.md b/docs/cli/commands.md similarity index 96% rename from docs/workloads/cli.md rename to docs/cli/commands.md index 478b609f94..450aec8ce7 100644 --- a/docs/workloads/cli.md +++ b/docs/cli/commands.md @@ -1,8 +1,8 @@ -# CLI reference +# Commands _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -### deploy +## deploy ```text create or update apis @@ -18,7 +18,7 @@ Flags: -h, --help help for deploy ``` -### get +## get ```text get information about apis or jobs @@ -34,7 +34,7 @@ Flags: -h, --help help for get ``` -### logs +## logs ```text stream logs from an api @@ -47,7 +47,7 @@ Flags: -h, --help help for logs ``` -### patch +## patch ```text update API configuration for a deployed API @@ -62,7 +62,7 @@ Flags: -h, --help help for patch ``` -### refresh +## refresh ```text restart all replicas for an api (without downtime) @@ -77,7 +77,7 @@ Flags: -h, --help help for refresh ``` -### predict +## predict ```text make a prediction request using a json file @@ -90,7 +90,7 @@ Flags: -h, --help help for predict ``` -### delete +## delete ```text delete any kind of api or stop a batch job @@ -106,7 +106,7 @@ Flags: -h, --help help for delete ``` -### cluster up +## cluster up ```text spin up a cluster on aws @@ -125,7 +125,7 @@ Flags: -h, --help help for up ``` -### cluster info +## cluster info ```text get information about a cluster @@ -145,7 +145,7 @@ Flags: -h, --help help for info ``` -### cluster configure +## cluster configure ```text update a cluster's configuration @@ -164,7 +164,7 @@ Flags: -h, --help help for configure ``` -### cluster down +## cluster down ```text spin down a cluster @@ -182,7 +182,7 @@ Flags: -h, --help help for down ``` -### cluster export +## cluster export ```text download the code and configuration for APIs @@ -199,7 +199,7 @@ Flags: -h, --help help for export ``` -### env configure +## env configure ```text configure an environment @@ -216,7 +216,7 @@ Flags: -h, --help help for configure ``` -### env list +## env list ```text list all configured environments @@ -229,7 +229,7 @@ Flags: -h, --help help for list ``` -### env default +## env default ```text set the default environment @@ -241,7 +241,7 @@ Flags: -h, --help help for default ``` -### env delete +## env delete ```text delete an environment configuration @@ -253,7 +253,7 @@ Flags: -h, --help help for delete ``` -### version +## version ```text print the cli and cluster versions @@ -266,7 +266,7 @@ Flags: -h, --help help for version ``` -### completion +## completion ```text generate shell completion scripts diff --git a/docs/cli/install.md b/docs/cli/install.md new file mode 100644 index 0000000000..d72c54e2cc --- /dev/null +++ b/docs/cli/install.md @@ -0,0 +1,44 @@ +# Install + +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ + +## Install with pip + +```bash +pip install cortex +``` + +## Install without the Python client + +```bash +# Replace `VERSION` with the complete CLI version +$ bash -c "$(curl -sS https://raw.githubusercontent.com/cortexlabs/cortex/vVERSION/get-cli.sh)" + +# Example command to download CLI version 0.18.1 (note the "v") +$ bash -c "$(curl -sS https://raw.githubusercontent.com/cortexlabs/cortex/v0.18.1/get-cli.sh)" +``` + +By default, the Cortex CLI is installed at `/usr/local/bin/cortex`. To install the executable elsewhere, export the `CORTEX_INSTALL_PATH` environment variable to your desired location before running the command above. + +By default, the Cortex CLI creates a directory at `~/.cortex/` and uses it to store environment configuration. To use a different directory, export the `CORTEX_CLI_CONFIG_DIR` environment variable before running a `cortex` command. + +## Environments + +By default, the CLI has a single environment named `local`. When you create a cluster with `cortex cluster up`, an environment named `aws` or `gcp` is automatically created to point to your cluster. You can name the environment something else via the `--configure-env` flag, e.g. `cortex cluster up --configure-env prod`. You can also use the `--configure-env` flag with `cortex cluster info` and `cortex cluster configure` to create / update the specified environment. + +### Example + +```bash +cortex deploy # uses local env; same as `cortex deploy --env local` +cortex logs my-api # uses local env; same as `cortex logs my-api --env local` +cortex delete my-api # uses local env; same as `cortex delete my-api --env local` + +cortex cluster up # configures the aws env; same as `cortex cluster up --configure-env aws` +cortex deploy --env aws +cortex deploy # uses local env; same as `cortex deploy --env local` + +# optional: change the default environment to aws +cortex env default aws # sets aws as the default env +cortex deploy # uses aws env; same as `cortex deploy --env aws` +cortex deploy --env local +``` diff --git a/docs/workloads/python-client.md b/docs/cli/python-client.md similarity index 100% rename from docs/workloads/python-client.md rename to docs/cli/python-client.md diff --git a/docs/workloads/telemetry.md b/docs/cli/telemetry.md similarity index 95% rename from docs/workloads/telemetry.md rename to docs/cli/telemetry.md index 0c9c3f4821..8d6858becd 100644 --- a/docs/workloads/telemetry.md +++ b/docs/cli/telemetry.md @@ -10,4 +10,4 @@ If telemetry is enabled, events and errors are collected. Each time you run a co ## How do I opt out? -If you'd like to disable telemetry, modify your `~/.cortex/cli.yaml` file (or create it if it doesn't exist) and add `telemetry: false`. +If you'd like to disable telemetry, modify your `~/.cortex/cli.yaml` file (or create it if it doesn't exist) and add `telemetry: false` before spinning up your cluster. diff --git a/docs/cli/uninstall.md b/docs/cli/uninstall.md new file mode 100644 index 0000000000..7faa3c382b --- /dev/null +++ b/docs/cli/uninstall.md @@ -0,0 +1,8 @@ +# Uninstall + +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ + +```bash +pip uninstall cortex +rm -rf ~/.cortex +``` diff --git a/docs/guides/contributing.md b/docs/contributing.md similarity index 100% rename from docs/guides/contributing.md rename to docs/contributing.md diff --git a/docs/gcp/uninstall.md b/docs/gcp/uninstall.md index db5e91d267..2ed39a17e1 100644 --- a/docs/gcp/uninstall.md +++ b/docs/gcp/uninstall.md @@ -2,17 +2,8 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -## Spin down Cortex - ```bash -# spin down Cortex cortex cluster-gcp down - -# uninstall the CLI -pip uninstall cortex -rm -rf ~/.cortex ``` -If you modified your bash profile, you may wish to remove `source <(cortex completion bash)` from it (or remove `source <(cortex completion zsh)` for `zsh`). - -*Note: The `cortex cluster-gcp down` command doesn't wait for the cluster to come down. You can ensure that the cluster has been removed by checking the GKE console.* +The `cortex cluster-gcp down` command doesn't wait for the cluster to spin down. You can ensure that the cluster has spun down by checking the GKE console. diff --git a/docs/guides/cli.md b/docs/guides/cli.md deleted file mode 100644 index a990665788..0000000000 --- a/docs/guides/cli.md +++ /dev/null @@ -1,54 +0,0 @@ -# Installing the CLI - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -## Install on Mac / Linux - -### Install the CLI with Python Client - -```bash -pip install cortex -``` - -### Install the CLI without Python Client - -```bash -# Replace `INSERT_CORTEX_VERSION` with the complete CLI version (e.g. 0.18.1): -$ bash -c "$(curl -sS https://raw.githubusercontent.com/cortexlabs/cortex/vINSERT_CORTEX_VERSION/get-cli.sh)" - -# For example to download CLI version 0.18.1 (Note the "v"): -$ bash -c "$(curl -sS https://raw.githubusercontent.com/cortexlabs/cortex/v0.18.1/get-cli.sh)" -``` - -By default, the Cortex CLI is installed at `/usr/local/bin/cortex`. To install the executable elsewhere, export the `CORTEX_INSTALL_PATH` environment variable to your desired location before running the command above. - -By default, the Cortex CLI creates a directory at `~/.cortex/` and uses it to store environment configuration. To use a different directory, export the `CORTEX_CLI_CONFIG_DIR` environment variable before running a `cortex` command. - - -## Install on Windows - -Requires an x64 system with Windows 10 of **Version 1903** or higher, with **Build 18362** or higher. - -### Step 1 - -Install and configure the WSL (Windows Subsystem for Linux) version 2 on your machine following [this installation guide](https://docs.microsoft.com/en-us/windows/wsl/install-win10). - -In our example, we assume the installation of the Ubuntu distribution. - -### Step 2 - -Install and configure the Docker Desktop Engine app to use WSL 2 as its backend by following the steps in the [Docker Desktop WSL 2 backend guide](https://docs.docker.com/docker-for-windows/wsl/). - -### Step 3 - -Run Ubuntu in the terminal on your Windows machine and right-click the window's bar and click on *Properties*: - -![step-3a](https://user-images.githubusercontent.com/26958764/96926494-493cdf80-14be-11eb-9fac-4c81e1fac55c.png) - -In the *Font* category, set the font to one of the following fonts: **SimSun-ExtB** (recommended), **MS Gothic**, or **NSimSun**. Choosing one of these fonts helps render all Unicode characters correctly. Once selected, click *Okay*. - -![step-3b](https://user-images.githubusercontent.com/26958764/96926763-adf83a00-14be-11eb-9584-4eff3faf2377.png) - -### Step 4 - -Within the Ubuntu terminal, install the Cortex CLI as you would on a Mac / Linux machine. diff --git a/docs/guides/docker-hub-rate-limiting.md b/docs/guides/docker-hub-rate-limiting.md deleted file mode 100644 index 378919686d..0000000000 --- a/docs/guides/docker-hub-rate-limiting.md +++ /dev/null @@ -1,186 +0,0 @@ -# Docker Hub rate limiting - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -*Note: This guide is only relevant for Cortex version `0.22.1` and earlier. Starting in version `0.23.0`, we've migrated from Docker Hub to Quay (which allows for unlimited image pulls for unauthenticated users). If you upgrade to version >= `0.23.0`, you can disregard this guide.* - -Docker Hub's [newly enforced rate-limiting policy](https://www.docker.com/increase-rate-limits) can negatively impact your cluster. This is much likelier to be an issue if you've set `subnet_visibility: private` in your cluster configuration file, since with private subnets, all requests from all nodes are routed through the NAT Gateway, and will therefore have the same IP address (docker imposes the rate limit per IP address). If you haven't specified `subnet_visibility` or have set `subnet_visibility: public`, this is less likely to be an issue for you, since each instance will have its own IP address. - -If you are affected by Docker Hub's rate limiting, your may encounter issues such as: - -* your APIs typically run as expected but new replicas (during scale up) or newly submitted batch jobs suddenly stop working for a period of time and then eventually they start working again -* you encounter scaling issues for Realtime APIs -* batch jobs are stuck in an in progress state for an unusually long period of time - -Follow these steps to determine if this issue is affecting your cluster: - -1. [Setup kubectl](./kubectl-setup.md) -2. `kubectl get pods --all-namespaces` -3. Check the pod status column for image pull failures such as `ErrImagePull`, `ImagePullBackoff`. If you don't see any, the rate limiting may not be affecting you currently. -4. Get the pod id and namespace of a pod encountering image pull failures -5. `kubectl describe pod --namespace ` -6. Under the events section, if you see error events related to docker hub rate limiting, then your cluster is likely affected by the rate limiting - -There are three ways to avoid this issue: - -## Use Cortex images from quay.io - -In response to Docker Hub's new image pull policy, we have migrated our images to [quay.io](https://quay.io). This registry allows for unlimited image pulls for unauthenticated users. - -It is possible to configure Cortex to use the images from Quay instead of Docker Hub: - -### Update your cluster configuration file - -Add the following to your [cluster configuration file](../aws/install.md) (e.g. `cluster.yaml`). In the image paths below, make sure to set `` to your cluster's version. - -```yaml -# cluster.yaml - -image_manager: quay.io/cortexlabs/manager: -image_operator: quay.io/cortexlabs/operator: -image_downloader: quay.io/cortexlabs/downloader: -image_request_monitor: quay.io/cortexlabs/request-monitor: -image_cluster_autoscaler: quay.io/cortexlabs/cluster-autoscaler: -image_metrics_server: quay.io/cortexlabs/metrics-server: -image_nvidia: quay.io/cortexlabs/nvidia: -image_inferentia: quay.io/cortexlabs/inferentia: -image_neuron_rtd: quay.io/cortexlabs/neuron-rtd: -image_fluentd: quay.io/cortexlabs/fluentd: -image_statsd: quay.io/cortexlabs/statsd: -image_istio_proxy: quay.io/cortexlabs/istio-proxy: -image_istio_pilot: quay.io/cortexlabs/istio-pilot: -``` - -For cluster version <= `0.20.0`, also add the following two images: - -```yaml -image_istio_galley: quay.io/cortexlabs/istio-galley: -image_istio_citadel: quay.io/cortexlabs/istio-citadel: -``` - -For Cortex cluster version < `0.16.0`, please upgrade your cluster to the latest version. If you upgrade to version >= `0.23.0`, you can disregard this guide. - -Once you've updated your cluster configuration file, you can spin up your cluster (e.g. `cortex cluster up --config cluster.yaml`). - -### Update your API configuration file(s) - -To configure your APIs to use the Quay images, you can update your [API configuration files](../workloads/realtime/configuration.md). The image paths are specified in `predictor.image` (and `predictor.tensorflow_serving_image` for APIs with `kind: tensorflow`). Be advised that by default, the Docker Hub images are used for your predictors, so you will need to specify the Quay image paths for all of your APIs. - -Here is a list of available images (make sure to set `` to your cluster's version): - -```text -quay.io/cortexlabs/python-predictor-cpu: -quay.io/cortexlabs/python-predictor-gpu: -quay.io/cortexlabs/python-predictor-inf: -quay.io/cortexlabs/tensorflow-serving-cpu: -quay.io/cortexlabs/tensorflow-serving-gpu: -quay.io/cortexlabs/tensorflow-serving-inf: -quay.io/cortexlabs/tensorflow-predictor: -quay.io/cortexlabs/onnx-predictor-cpu: -quay.io/cortexlabs/onnx-predictor-gpu: -quay.io/cortexlabs/python-predictor-cpu-slim: -quay.io/cortexlabs/python-predictor-gpu-slim:-cuda10.0 -quay.io/cortexlabs/python-predictor-gpu-slim:-cuda10.1 -quay.io/cortexlabs/python-predictor-gpu-slim:-cuda10.2 -quay.io/cortexlabs/python-predictor-gpu-slim:-cuda11.0 -quay.io/cortexlabs/python-predictor-inf-slim: -quay.io/cortexlabs/tensorflow-predictor-slim: -quay.io/cortexlabs/onnx-predictor-cpu-slim: -quay.io/cortexlabs/onnx-predictor-gpu-slim: -``` - -## Paid Docker Hub subscription - -Another option is to pay for the Docker Hub subscription to remove the limit on the number of image pulls. Docker Hub's updated pricing model allows unlimited pulls on a _Pro_ subscription for individuals as described [here](https://www.docker.com/pricing). - -The advantage of this approach is that there's no need to do a `cortex cluster down`/`cortex cluster up` to authenticate with your Docker Hub account. - -By default, the Cortex cluster pulls the images as an anonymous user. To configure your Cortex cluster to pull the images as an authenticated user, follow these steps: - -### Step 1 - -Install and configure kubectl ([instructions](kubectl-setup.md)). - -### Step 2 - -Set the following environment variables, replacing the placeholders with your docker username and password: - -```bash -DOCKER_USERNAME=*** -DOCKER_PASSWORD=*** -``` - -Run the following commands: - -```bash -kubectl create secret docker-registry registry-credentials \ - --namespace default \ - --docker-username=$DOCKER_USERNAME \ - --docker-password=$DOCKER_PASSWORD - -kubectl create secret docker-registry registry-credentials \ - --namespace kube-system \ - --docker-username=$DOCKER_USERNAME \ - --docker-password=$DOCKER_PASSWORD - -kubectl patch serviceaccount default --namespace default \ - -p "{\"imagePullSecrets\": [{\"name\": \"registry-credentials\"}]}" - -kubectl patch serviceaccount operator --namespace default \ - -p "{\"imagePullSecrets\": [{\"name\": \"registry-credentials\"}]}" - -kubectl patch serviceaccount fluentd --namespace default \ - -p "{\"imagePullSecrets\": [{\"name\": \"registry-credentials\"}]}" - -kubectl patch serviceaccount default --namespace kube-system \ - -p "{\"imagePullSecrets\": [{\"name\": \"registry-credentials\"}]}" - -kubectl patch serviceaccount cluster-autoscaler --namespace kube-system \ - -p "{\"imagePullSecrets\": [{\"name\": \"registry-credentials\"}]}" - -kubectl patch serviceaccount metrics-server --namespace kube-system \ - -p "{\"imagePullSecrets\": [{\"name\": \"registry-credentials\"}]}" - -# Only if you are using cortex version <= 0.20.0: -kubectl patch serviceaccount istio-cni --namespace kube-system \ - -p "{\"imagePullSecrets\": [{\"name\": \"registry-credentials\"}]}" - -# Only if you are using Inferentia: -kubectl patch serviceaccount neuron-device-plugin --namespace kube-system \ - -p "{\"imagePullSecrets\": [{\"name\": \"registry-credentials\"}]}" -``` - -### Updating your credentials - -First remove your old docker credentials from the cluster: - -```bash -kubectl delete secret --namespace default registry-credentials -kubectl delete secret --namespace kube-system registry-credentials -``` - -Then repeat step 2 above with your updated credentials. - -### Removing your credentials - -To remove your docker credentials from the cluster, run the following commands: - -```bash -kubectl delete secret --namespace default registry-credentials -kubectl delete secret --namespace kube-system registry-credentials - -kubectl patch serviceaccount default --namespace default -p "{\"imagePullSecrets\": []}" -kubectl patch serviceaccount operator --namespace default -p "{\"imagePullSecrets\": []}" -kubectl patch serviceaccount fluentd --namespace default -p "{\"imagePullSecrets\": []}" -kubectl patch serviceaccount default --namespace kube-system -p "{\"imagePullSecrets\": []}" -kubectl patch serviceaccount cluster-autoscaler --namespace kube-system -p "{\"imagePullSecrets\": []}" -kubectl patch serviceaccount metrics-server --namespace kube-system -p "{\"imagePullSecrets\": []}" -# Only if you are using cortex version <= 0.20.0: -kubectl patch serviceaccount istio-cni --namespace kube-system -p "{\"imagePullSecrets\": []}" -# Only if you are using Inferentia: -kubectl patch serviceaccount neuron-device-plugin --namespace kube-system -p "{\"imagePullSecrets\": []}" -``` - -## Push to AWS ECR (Elastic Container Registry) - -You can also push the Cortex images to ECR on your AWS account, and pull from your ECR repository in your cluster. Follow [this guide](self-hosted-images.md) to do this. diff --git a/docs/guides/single-node-deployment.md b/docs/guides/single-node-deployment.md deleted file mode 100644 index 9ab8e9f2eb..0000000000 --- a/docs/guides/single-node-deployment.md +++ /dev/null @@ -1,117 +0,0 @@ -# Single node deployment - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -You can use Cortex to deploy models on a single node. Deploying to a single node can be cheaper than spinning up a Cortex cluster with 1 worker node. It also may be useful for testing your model on a GPU if you don't have access to one locally. - -Deploying on a single node entails `ssh`ing into that instance and running Cortex locally. When using this approach, you won't get the the advantages of deploying to a cluster such as autoscaling, rolling updates, etc. - -We've included the step-by-step instructions for AWS, although the process should be similar for any other cloud provider. - -## AWS - -### Step 1 - -Navigate to the [EC2 dashboard](https://console.aws.amazon.com/ec2/home) in your AWS Web Console and click "Launch instance". - -![ec2 dashboard](https://user-images.githubusercontent.com/4365343/81063901-ad370180-8ea6-11ea-8cd8-f552911043a9.png) - -### Step 2 - -Choose a Linux based AMI instance. We recommend using "Ubuntu Server 18.04 LTS". If you plan to serve models on a GPU, we recommend using "Deep Learning AMI (Ubuntu 18.04)" because it comes with Docker Engine and NVIDIA pre-installed. - -![step 2](https://user-images.githubusercontent.com/4365343/81064199-41a16400-8ea7-11ea-8d69-ae4ead6bf0be.png) - -### Step 3 - -Choose your desired instance type (it should have enough CPU and Memory to run your model). Typically it is a good idea have at least 1 GB of memory to spare for your operating system and any other processes that you might want to run on the instance. To run most Cortex examples, an m5.large instance is sufficient. - -Selecting an appropriate GPU instance depends on the kind of GPU card you want. Different GPU instance families have different GPU cards (i.e. g4 family uses NVIDIA T4 while the p2 family uses NVIDIA K80). For typical GPU use cases, g4dn.xlarge is one of the cheaper instances that should be able to serve most large models, including deep learning models such as GPT-2. - -Once you've chosen your instance click "Next: Configure instance details". - -![step 3](https://user-images.githubusercontent.com/4365343/81065727-07859180-8eaa-11ea-9293-af89906e4c6a.png) - -### Step 4 - -To make things easy for testing and development, we should make sure that the EC2 instance has a public IP address. Then click "Next: Add Storage". - -![step 4](https://user-images.githubusercontent.com/4365343/81064806-5af6e000-8ea8-11ea-8e94-838fbea2710f.png) - -### Step 5 - -We recommend having at least 50 GB of storage to save your models to disk and to download the docker images needed to serve your model. Then click "Next: Add Tags". - -![step 5](https://user-images.githubusercontent.com/4365343/81078638-7cfa5d80-8ebc-11ea-820d-3baba690dbf8.png) - -### Step 6 - -Adding tags is optional. You can add tags to your instance to improve searchability. Then click "Next: Configure Security Group". - -### Step 7 - -Configure your security group to allow inbound traffic to the `local_port` number you specified in your `cortex.yaml` (the default is 8888 if not specified). Exposing this port allows you to make requests to your API but it also exposes it to the world so be careful. Then click "Next: Review and Launch". - -![step 7](https://user-images.githubusercontent.com/4365343/81065102-e2445380-8ea8-11ea-96e0-65676a0bafa8.png) - -### Step 8 - -Double check details such as instance type, CPU, Memory, and exposed ports. Then click "Launch". - -![step 8](https://user-images.githubusercontent.com/4365343/81065800-26842380-8eaa-11ea-9c73-60ba0586ba38.png) - -### Step 9 - -You will be prompted to select a key pair, which is used to connect to your instance. Choose a key pair that you have access to. If you don't have one, you can create one and it will be downloaded to your browser's downloads folder. Then click "Launch Instances". - -![step 9](https://user-images.githubusercontent.com/4365343/81074878-9d73e900-8eb7-11ea-9c03-79ffea902dee.png) - -### Step 10 - -Once your instance is running, follow the [relevant instructions](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AccessingInstances.html) to connect to your instance. - -If you are using a Mac or a Linux based OS, these instructions can help you ssh into your instance: - -```bash -# make sure you have .ssh folder -$ mkdir -p ~/.ssh - -# if your key was downloaded to your Downloads folder, move it to your .ssh folder -$ mv ~/Downloads/cortex-node.pem ~/.ssh/ - -# modify your key's permissions -$ chmod 400 ~/.ssh/cortex-node.pem - -# get your instance's public DNS and then ssh into it -$ ssh -i "cortex-node.pem" ubuntu@ec2-3-235-100-162.compute-1.amazonaws.com -``` - -![step 10](https://user-images.githubusercontent.com/4365343/81078225-f180cc80-8ebb-11ea-81ae-5f5f0e76e623.png) - -### Step 11 - -Docker Engine needs to be installed on your instance before you can use Cortex. Skip to Step 12 if you are using the "Deep Learning AMI" because Docker Engine is already installed. Otherwise, follow these [instructions](https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository) to install Docker Engine. - -Once Docker Engine is installed, enable the Docker commands to be used without `sudo`: - -```bash -$ sudo groupadd docker; sudo gpasswd -a $USER docker - -# you must log out and back in for the permission changes to be effective -$ logout -``` - -If you have installed Docker correctly, you should be able to run docker commands such as `docker run hello-world` without running into permission issues or needing `sudo`. - -### Step 12 - -Install the Cortex CLI. - - -```bash -$ bash -c "$(curl -sS https://raw.githubusercontent.com/cortexlabs/cortex/master/get-cli.sh)" -``` - -### Step 13 - -You can now use Cortex to deploy your model. diff --git a/docs/summary.md b/docs/summary.md index 8376f6e675..8a046bf8f3 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -1,7 +1,17 @@ # Table of contents * [Get started](tutorials/realtime.md) -* [Chat with us](https://gitter.im/cortexlabs/cortex) +* [Community](https://gitter.im/cortexlabs/cortex) +* [Contributing](contributing.md) + +## CLI + +* [Install](cli/install.md) +* [Commands](cli/commands.md) +* [Python client](cli/python-client.md) +* [Environments](cli/environments.md) +* [Telemetry](cli/telemetry.md) +* [Uninstall](cli/uninstall.md) ## Tutorials @@ -69,9 +79,6 @@ * [Exporting models](guides/exporting.md) * [Multi-model endpoints](guides/multi-model.md) * [View API metrics](guides/metrics.md) -* [Single node deployment](guides/single-node-deployment.md) * [Setting up kubectl](guides/kubectl.md) * [Self-hosted Docker images](guides/self-hosted-images.md) -* [Docker Hub rate limiting](guides/docker-hub-rate-limiting.md) * [Private docker registry](guides/private-docker.md) -* [Contributing](guides/contributing.md) diff --git a/docs/workloads/environments.md b/docs/workloads/environments.md deleted file mode 100644 index a21ec980d8..0000000000 --- a/docs/workloads/environments.md +++ /dev/null @@ -1,98 +0,0 @@ -# Environments - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -The `cortex` CLI can be used to deploy models locally and/or to any number of clusters. Environments are used to select which cluster to use for a `cortex` command. An environment contains the information required to connect to a cluster (e.g. AWS credentials and Cortex operator URL). - -## Example: `aws` only - -```bash -cortex cluster up # configures the aws env; same as `cortex cluster up --configure-env aws` -cortex env default aws # sets aws as the default env -cortex deploy # uses aws env; same as `cortex deploy --env aws` -cortex logs my-api # uses aws env; same as `cortex logs my-api --env aws` -cortex delete my-api # uses aws env; same as `cortex delete my-api --env aws` -``` - -## Example: `local` only - -```bash -cortex deploy # uses local env; same as `cortex deploy --env local` -cortex logs my-api # uses local env; same as `cortex logs my-api --env local` -cortex delete my-api # uses local env; same as `cortex delete my-api --env local` -``` - -## Example: `local` and `aws` - -```bash -cortex deploy # uses local env; same as `cortex deploy --env local` -cortex logs my-api # uses local env; same as `cortex logs my-api --env local` -cortex delete my-api # uses local env; same as `cortex delete my-api --env local` - -cortex cluster up # configures the aws env; same as `cortex cluster up --configure-env aws` -cortex deploy --env aws -cortex deploy # uses local env; same as `cortex deploy --env local` - -# optional: change the default environment to aws -cortex env default aws # sets aws as the default env -cortex deploy # uses aws env; same as `cortex deploy --env aws` -cortex deploy --env local -``` - -## Example: multiple clusters - -```bash -cortex cluster up --config cluster1.yaml --configure-env cluster1 # configures the cluster1 env -cortex cluster up --config cluster2.yaml --configure-env cluster2 # configures the cluster2 env - -cortex deploy --env cluster1 -cortex logs my-api --env cluster1 -cortex delete my-api --env cluster1 - -cortex deploy --env cluster2 -cortex logs my-api --env cluster2 -cortex delete my-api --env cluster2 -``` - -## Example: multiple clusters, if you omitted the `--configure-env` on `cortex cluster up` - -```bash -cortex cluster info --config cluster1.yaml --configure-env cluster1 # configures the cluster1 env -cortex cluster info --config cluster2.yaml --configure-env cluster2 # configures the cluster2 env - -cortex deploy --env cluster1 -cortex logs my-api --env cluster1 -cortex delete my-api --env cluster1 - -cortex deploy --env cluster2 -cortex logs my-api --env cluster2 -cortex delete my-api --env cluster2 -``` - -## Example: configure `cortex` CLI to connect to an existing cluster - -If you are installing the `cortex` CLI on a new computer, you can configure it to access an existing Cortex cluster. - -On the computer which already has the CLI configured, run: - -```bash -cortex env list -``` - -Take note of the environment name and operator endpoint of the desired environment. - -On your new machine, run: - -```bash -cortex env configure -``` - -This will prompt for the necessary configuration. Note that the AWS credentials that you use here do not need any IAM permissions attached. If you will be running any `cortex cluster` commands specify the preferred AWS credentials using cli flags `--aws-key AWS_ACCESS_KEY_ID --aws-secret AWS_SECRET_ACCESS_KEY`. See [IAM permissions](../aws/security.md#iam-permissions) for more details. - -## Environments overview - -By default, the CLI ships with a single environment named `local`. This is the default environment for all Cortex commands (other than `cortex cluster` commands), which means that APIs will be deployed locally by default. - -When you create a cluster with `cortex cluster up`, an environment named `aws` is automatically created to point to your new cluster. You can name the environment something else via the `--configure-env` flag, e.g. `cortex cluster up --configure-env prod`. You can also use the `--configure-env` flag with `cortex cluster info` and `cortex cluster configure` to create/update the specified environment. You may interact with your cluster by appending `--env aws` to your `cortex` commands, e.g. `cortex deploy --env aws`. - -You can list your environments with `cortex env list`, change the default environment with `cortex env default`, delete an environment with `cortex env delete`, and create/update an environment with `cortex env configure`. From 428bd4f64a93e82a96036b53fd569678554d5b6a Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Tue, 15 Dec 2020 15:22:16 -0800 Subject: [PATCH 02/11] Update docs --- docs/aws/inferentia.md | 8 +++----- docs/cli/environments.md | 0 docs/guides/exporting.md | 4 +--- docs/guides/multi-model.md | 3 +-- docs/guides/private-docker.md | 2 +- docs/guides/self-hosted-images.md | 4 ++-- docs/summary.md | 9 ++------- docs/troubleshooting/api-request-errors.md | 2 +- .../nvidia-container-runtime-not-found.md | 4 +--- docs/workloads/batch/endpoints.md | 2 -- docs/workloads/batch/predictors.md | 6 +++--- docs/workloads/python-packages.md | 2 +- docs/workloads/realtime/predictors.md | 6 +++--- docs/workloads/realtime/traffic-splitter.md | 2 +- 14 files changed, 20 insertions(+), 34 deletions(-) create mode 100644 docs/cli/environments.md diff --git a/docs/aws/inferentia.md b/docs/aws/inferentia.md index cfd56efaa8..57c36aaae9 100644 --- a/docs/aws/inferentia.md +++ b/docs/aws/inferentia.md @@ -2,8 +2,6 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -To use [Inferentia ASICs](https://aws.amazon.com/machine-learning/inferentia/): - 1. You may need to [request a limit increase](https://console.aws.amazon.com/servicequotas/home?#!/services/ec2/quotas) for running Inferentia instances. 1. Set the instance type to an AWS Inferentia instance (e.g. `inf1.xlarge`) when creating your Cortex cluster. 1. Set the `inf` field in the `compute` configuration for your API. One unit of `inf` corresponds to one Inferentia ASIC with 4 NeuronCores *(not the same thing as `cpu`)* and 8GB of cache memory *(not the same thing as `mem`)*. Fractional requests are not allowed. @@ -20,9 +18,9 @@ Each Inferentia ASIC comes with 4 NeuronCores and 8GB of cache memory. To better ### NeuronCore Groups -A [NeuronCore Group](https://github.com/aws/aws-neuron-sdk/blob/master/docs/tensorflow-neuron/tutorial-NeuronCore-Group.md) (NCG) is a set of NeuronCores that is used to load and run a compiled model. NCGs exist to aggregate NeuronCores to improve hardware performance. Models can be shared within an NCG, but this would require the device driver to dynamically context switch between each model, which degrades performance. Therefore we've decided to only allow one model per NCG (unless you are using a [multi-model endpoint](../guides/multi-model.md), in which case there will be multiple models on a single NCG, and there will be context switching). +A [NeuronCore Group](https://github.com/aws/aws-neuron-sdk/blob/master/docs/tensorflow-neuron/tutorial-NeuronCore-Group.md) (NCG) is a set of NeuronCores that is used to load and run a compiled model. NCGs exist to aggregate NeuronCores to improve hardware performance. Models can be shared within an NCG, but this would require the device driver to dynamically context switch between each model, which degrades performance. Therefore we've decided to only allow one model per NCG (unless you are using a multi-model endpoint, in which case there will be multiple models on a single NCG, and there will be context switching). -Each Cortex API process will have its own copy of the model and will run on its own NCG (the number of API processes is configured by the [`processes_per_replica`](realtime-api/autoscaling.md#replica-parallelism) for Realtime APIs field in the API configuration). Each NCG will have an equal share of NeuronCores. Therefore, the size of each NCG will be `4 * inf / processes_per_replica` (`inf` refers to your API's `compute` request, and it's multiplied by 4 because there are 4 NeuronCores per Inferentia chip). +Each Cortex API process will have its own copy of the model and will run on its own NCG (the number of API processes is configured by the `processes_per_replica` for Realtime APIs field in the API configuration). Each NCG will have an equal share of NeuronCores. Therefore, the size of each NCG will be `4 * inf / processes_per_replica` (`inf` refers to your API's `compute` request, and it's multiplied by 4 because there are 4 NeuronCores per Inferentia chip). For example, if your API requests 2 `inf` chips, there will be 8 NeuronCores available. If you set `processes_per_replica` to 1, there will be one copy of your model running on a single NCG of size 8 NeuronCores. If `processes_per_replica` is 2, there will be two copies of your model, each running on a separate NCG of size 4 NeuronCores. If `processes_per_replica` is 4, there will be 4 NCGs of size 2 NeuronCores, and if If `processes_per_replica` is 8, there will be 8 NCGs of size 1 NeuronCores. In this scenario, these are the only valid values for `processes_per_replica`. In other words the total number of requested NeuronCores (which equals 4 * the number of requested Inferentia chips) must be divisible by `processes_per_replica`. @@ -64,7 +62,7 @@ model_neuron = torch.neuron.trace( model_neuron.save(compiled_model) ``` -The versions of `tensorflow-neuron` and `torch-neuron` that are used by Cortex are found in the [Realtime API pre-installed packages list](realtime-api/predictors.md#inferentia-equipped-apis) and [Batch API pre-installed packages list](batch-api/predictors.md#inferentia-equipped-apis). When installing these packages with `pip` to compile models of your own, use the extra index URL `--extra-index-url=https://pip.repos.neuron.amazonaws.com`. +The versions of `tensorflow-neuron` and `torch-neuron` that are used by Cortex are found in the [Realtime API pre-installed packages list and Batch API pre-installed packages list. When installing these packages with `pip` to compile models of your own, use the extra index URL `--extra-index-url=https://pip.repos.neuron.amazonaws.com`. A list of model compilation examples for Inferentia can be found on the [`aws/aws-neuron-sdk`](https://github.com/aws/aws-neuron-sdk) repo for [TensorFlow](https://github.com/aws/aws-neuron-sdk/blob/master/docs/tensorflow-neuron/) and for [PyTorch](https://github.com/aws/aws-neuron-sdk/blob/master/docs/pytorch-neuron/README.md). diff --git a/docs/cli/environments.md b/docs/cli/environments.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/guides/exporting.md b/docs/guides/exporting.md index b34e6c5b82..f83a52b056 100644 --- a/docs/guides/exporting.md +++ b/docs/guides/exporting.md @@ -16,8 +16,6 @@ The recommended approach is export your PyTorch model with [torch.save()](https: torch.save(model.state_dict(), "weights.pth") ``` -For Inferentia-equipped instances, check the [Inferentia instructions](inferentia.md#neuron). - ### ONNX It may also be possible to export your PyTorch model into the ONNX format using [torch.onnx.export()](https://pytorch.org/docs/stable/onnx.html#torch.onnx.export). For example: @@ -43,7 +41,7 @@ onnx.save(model, 'my_model.onnx') ### `SavedModel` -You may export your trained model into an export directory, or use a checkpoint directory containing the export directory (which is usually the case if you used `estimator.train_and_evaluate()`). The folder may be zipped if you desire. For Inferentia-equipped instances, also check the [Inferentia instructions](inferentia.md#neuron). +You may export your trained model into an export directory, or use a checkpoint directory containing the export directory (which is usually the case if you used `estimator.train_and_evaluate()`). The folder may be zipped if you desire. A TensorFlow `SavedModel` directory should have this structure: diff --git a/docs/guides/multi-model.md b/docs/guides/multi-model.md index 7ea950b1f7..8ebc6f5c63 100644 --- a/docs/guides/multi-model.md +++ b/docs/guides/multi-model.md @@ -2,8 +2,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -It is possible to serve multiple models in the same Cortex API using any type of Cortex Predictor. In this guide we'll show the general outline of a multi-model deployment. The section for each predictor type is based on a corresponding example that can be found in the [examples directory](https://github.com/cortexlabs/cortex/tree/master/examples) of the Cortex project. +It is possible to serve multiple models in the same Cortex API using any type of Cortex Predictor. In this guide we'll show the general outline of a multi-model deployment. ## Python Predictor diff --git a/docs/guides/private-docker.md b/docs/guides/private-docker.md index d22fda829f..d93a9e46ba 100644 --- a/docs/guides/private-docker.md +++ b/docs/guides/private-docker.md @@ -12,7 +12,7 @@ When running Cortex locally, you can use private Docker images by running `docke ### Step 1 -Install and configure kubectl ([instructions](kubectl-setup.md)). +Install and configure kubectl ([instructions](kubectl.md)). ### Step 2 diff --git a/docs/guides/self-hosted-images.md b/docs/guides/self-hosted-images.md index 4f7fbcd547..6c51fce7a5 100644 --- a/docs/guides/self-hosted-images.md +++ b/docs/guides/self-hosted-images.md @@ -129,9 +129,9 @@ done echo "-----------------------------------------------" ``` -The first list of images that were printed (the cluster images) can be directly copy-pasted in your [cluster configuration file](../aws/install.md) before spinning up your cluster. +The first list of images that were printed (the cluster images) can be directly copy-pasted in your cluster configuration file before spinning up your cluster. -The second list of images that were printed (the API images) can be used in your [API configuration files](../workloads/realtime/api-configuration.md). The image paths are specified in `predictor.image` (and `predictor.tensorflow_serving_image` for APIs with `kind: tensorflow`). Be advised that by default, the public images offered by Cortex are used for your predictors, so you will need to specify your ECR image paths for all of your APIs. +The second list of images that were printed (the API images) can be used in your API configuration files. The image paths are specified in `predictor.image` (and `predictor.tensorflow_serving_image` for APIs with `kind: tensorflow`). Be advised that by default, the public images offered by Cortex are used for your predictors, so you will need to specify your ECR image paths for all of your APIs. ## Step 5 diff --git a/docs/summary.md b/docs/summary.md index 8a046bf8f3..3636e4a50b 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -45,7 +45,7 @@ ## Workloads -* [Realtime API](workloads/realtime.md) +* Realtime * [Predictor implementation](workloads/realtime/predictors.md) * [API configuration](workloads/realtime/configuration.md) * [API statuses](workloads/realtime/statuses.md) @@ -53,17 +53,13 @@ * [Parallelism](workloads/realtime/parallelism.md) * [Autoscaling](workloads/realtime/autoscaling.md) * [Traffic Splitter](workloads/realtime/traffic-splitter.md) -* [Batch API](workloads/batch.md) +* Batch * [Predictor implementation](workloads/batch/predictors.md) * [API configuration](workloads/batch/configuration.md) * [Endpoints](workloads/batch/endpoints.md) * [Job statuses](workloads/batch/statuses.md) -* [CLI reference](workloads/cli.md) -* [Python client](workloads/python-client.md) * [Python packages](workloads/python-packages.md) * [System packages](workloads/system-packages.md) -* [Environments](workloads/environments.md) -* [Telemetry](workloads/telemetry.md) ## Troubleshooting @@ -75,7 +71,6 @@ ## Guides -* [Installing the CLI](guides/cli.md) * [Exporting models](guides/exporting.md) * [Multi-model endpoints](guides/multi-model.md) * [View API metrics](guides/metrics.md) diff --git a/docs/troubleshooting/api-request-errors.md b/docs/troubleshooting/api-request-errors.md index 5b575bac78..9b61e6a673 100644 --- a/docs/troubleshooting/api-request-errors.md +++ b/docs/troubleshooting/api-request-errors.md @@ -8,4 +8,4 @@ When making prediction requests to your API, it's possible to get a `{"message": 1. Your API may have errored during initialization or while responding to a previous request. `cortex get API_NAME` will show the status of your API, and you can view the logs with `cortex logs API_NAME`. 1. If `cortex get API_NAME` shows your API's status as "updating" for a while and if `cortex logs API_NAME` doesn't shed any light onto what may be wrong, please see the [API is stuck updating](stuck-updating.md) troubleshooting guide. -It is also possible to receive a `{"message":"Service Unavailable"}` error message (with HTTP status code `503`) if you are using an API Gateway endpoint for your API and if your request exceeds API Gateway's 29 second timeout. If you don't know whether you are using API Gateway, you can run `cortex get ` and check if `networking.api_gateway` is not set to `none` in the api configuration. If the request is exceeding the API Gateway timeout, your client should receive the `{"message":"Service Unavailable"}` response ~29 seconds after making the request. To confirm that this is the issue, you can modify your `predict()` function to immediately return a response (e.g. `return "ok"`), re-deploy your API, wait for the update to complete, and try making a request. If your client successfully receives the "ok" response, it is likely that the API Gateway timeout is occurring. You can either modify your `predict()` implementation to take less time, run on faster hardware (e.g. GPUs), or disable API Gateway for this API by setting `api_gateway: none` in the `networking` field of the [api configuration](api-configuration.md) (see [networking](../aws/networking.md) for more details). +It is also possible to receive a `{"message":"Service Unavailable"}` error message (with HTTP status code `503`) if you are using an API Gateway endpoint for your API and if your request exceeds API Gateway's 29 second timeout. If you don't know whether you are using API Gateway, you can run `cortex get ` and check if `networking.api_gateway` is not set to `none` in the api configuration. If the request is exceeding the API Gateway timeout, your client should receive the `{"message":"Service Unavailable"}` response ~29 seconds after making the request. To confirm that this is the issue, you can modify your `predict()` function to immediately return a response (e.g. `return "ok"`), re-deploy your API, wait for the update to complete, and try making a request. If your client successfully receives the "ok" response, it is likely that the API Gateway timeout is occurring. You can either modify your `predict()` implementation to take less time, run on faster hardware (e.g. GPUs), or disable API Gateway for this API by setting `api_gateway: none` in the `networking` field of the API configuration. diff --git a/docs/troubleshooting/nvidia-container-runtime-not-found.md b/docs/troubleshooting/nvidia-container-runtime-not-found.md index cd13b7ef9b..42bcca54ce 100644 --- a/docs/troubleshooting/nvidia-container-runtime-not-found.md +++ b/docs/troubleshooting/nvidia-container-runtime-not-found.md @@ -6,9 +6,7 @@ When attempting to deploy a model to a GPU in the local environment, you may enc ## Check Compatibility -Please ensure that your local machine has an NVIDIA GPU card installed. If you don't have a local machine with an NVIDIA GPU, you can find instructions for spinning up a single GPU instance to try out model serving on a GPU with Cortex [here](../guides/single-node-deployment.md). - -Mac and Windows are currently not supported by the NVIDIA container runtime. You can find the complete list of supported operating system and architectures [here](https://nvidia.github.io/nvidia-container-runtime/). +Please ensure that your local machine has an NVIDIA GPU card installed. Mac and Windows are currently not supported by the NVIDIA container runtime. You can find the complete list of supported operating system and architectures [here](https://nvidia.github.io/nvidia-container-runtime). ## Install NVIDIA container runtime diff --git a/docs/workloads/batch/endpoints.md b/docs/workloads/batch/endpoints.md index 1e52c1b5e0..f7023f286e 100644 --- a/docs/workloads/batch/endpoints.md +++ b/docs/workloads/batch/endpoints.md @@ -2,8 +2,6 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -Once your model is [exported](../../guides/exporting.md), you've implemented a [Predictor](predictors.md), you've [configured your API](api-configuration.md), and you've [deployed an api](deployment.md), you can submit and manage jobs by making HTTP requests to your Batch API endpoint. - A deployed Batch API endpoint supports the following: 1. Submitting a batch job diff --git a/docs/workloads/batch/predictors.md b/docs/workloads/batch/predictors.md index ada4321a20..e5d1bb0cca 100644 --- a/docs/workloads/batch/predictors.md +++ b/docs/workloads/batch/predictors.md @@ -92,7 +92,7 @@ class PythonPredictor: pass ``` -For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your [API configuration](api-configuration.md), and it is passed through to your Predictor's constructor. The `config` parameters in the `API configuration` can be overridden by providing `config` in the job submission requests. +For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your [API configuration](configuration.md), and it is passed through to your Predictor's constructor. The `config` parameters in the `API configuration` can be overridden by providing `config` in the job submission requests. ### Pre-installed packages @@ -226,7 +226,7 @@ Cortex provides a `tensorflow_client` to your Predictor's constructor. `tensorfl When multiple models are defined using the Predictor's `models` field, the `tensorflow_client.predict()` method expects a second argument `model_name` which must hold the name of the model that you want to use for inference (for example: `self.client.predict(payload, "text-generator")`). See the [multi model guide](../../guides/multi-model.md#tensorflow-predictor) for more information. -For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your [API configuration](api-configuration.md), and it is passed through to your Predictor's constructor. The `config` parameters in the `API configuration` can be overridden by providing `config` in the job submission requests. +For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your [API configuration](configuration.md), and it is passed through to your Predictor's constructor. The `config` parameters in the `API configuration` can be overridden by providing `config` in the job submission requests. ### Pre-installed packages @@ -309,7 +309,7 @@ Cortex provides an `onnx_client` to your Predictor's constructor. `onnx_client` When multiple models are defined using the Predictor's `models` field, the `onnx_client.predict()` method expects a second argument `model_name` which must hold the name of the model that you want to use for inference (for example: `self.client.predict(model_input, "text-generator")`). See the [multi model guide](../../guides/multi-model.md#onnx-predictor) for more information. -For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your [API configuration](api-configuration.md), and it is passed through to your Predictor's constructor. The `config` parameters in the `API configuration` can be overridden by providing `config` in the job submission requests. +For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your [API configuration](configuration.md), and it is passed through to your Predictor's constructor. The `config` parameters in the `API configuration` can be overridden by providing `config` in the job submission requests. ### Pre-installed packages diff --git a/docs/workloads/python-packages.md b/docs/workloads/python-packages.md index a960dde070..b34881f503 100644 --- a/docs/workloads/python-packages.md +++ b/docs/workloads/python-packages.md @@ -16,7 +16,7 @@ You can install your required PyPI packages and import them in your Python files If you want to use `conda` to install your python packages, see the [Conda section](#conda-packages) below. -Note that some packages are pre-installed by default (see "pre-installed packages" for your Predictor type in the [Realtime API Predictor documentation](realtime-api/predictors.md) and [Batch API Predictor documentation](batch-api/predictors.md)). +Note that some packages are pre-installed by default (see "pre-installed packages" for your Predictor type in the Realtime API Predictor documentation and Batch API Predictor documentation). ## Private PyPI packages diff --git a/docs/workloads/realtime/predictors.md b/docs/workloads/realtime/predictors.md index b9aa1fdfa8..b5b23921a9 100644 --- a/docs/workloads/realtime/predictors.md +++ b/docs/workloads/realtime/predictors.md @@ -128,7 +128,7 @@ When explicit model paths are specified in the Python predictor's API configurat When multiple models are defined using the Predictor's `models` field, the `python_client.get_model()` method expects an argument `model_name` which must hold the name of the model that you want to load (for example: `self.client.get_model("text-generator")`). There is also an optional second argument to specify the model version. See [models](models.md) and the [multi model guide](../../guides/multi-model.md#python-predictor) for more information. -For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your [API configuration](api-configuration.md), and it is passed through to your Predictor's constructor. +For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your API configuration, and it is passed through to your Predictor's constructor. Your API can accept requests with different types of payloads such as `JSON`-parseable, `bytes` or `starlette.datastructures.FormData` data. Navigate to the [API requests](#api-requests) section to learn about how headers can be used to change the type of `payload` that is passed into your `predict` method. @@ -271,7 +271,7 @@ Cortex provides a `tensorflow_client` to your Predictor's constructor. `tensorfl When multiple models are defined using the Predictor's `models` field, the `tensorflow_client.predict()` method expects a second argument `model_name` which must hold the name of the model that you want to use for inference (for example: `self.client.predict(payload, "text-generator")`). There is also an optional third argument to specify the model version. See [models](models.md) and the [multi model guide](../../guides/multi-model.md#tensorflow-predictor) for more information. -For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as configurable model parameters or download links for initialization files. You define `config` in your [API configuration](api-configuration.md), and it is passed through to your Predictor's constructor. +For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as configurable model parameters or download links for initialization files. You define `config` in your [API configuration](configuration.md), and it is passed through to your Predictor's constructor. Your API can accept requests with different types of payloads such as `JSON`-parseable, `bytes` or `starlette.datastructures.FormData` data. Navigate to the [API requests](#api-requests) section to learn about how headers can be used to change the type of `payload` that is passed into your `predict` method. @@ -363,7 +363,7 @@ Cortex provides an `onnx_client` to your Predictor's constructor. `onnx_client` When multiple models are defined using the Predictor's `models` field, the `onnx_client.predict()` method expects a second argument `model_name` which must hold the name of the model that you want to use for inference (for example: `self.client.predict(model_input, "text-generator")`). There is also an optional third argument to specify the model version. See [models](models.md) and the [multi model guide](../../guides/multi-model.md#onnx-predictor) for more information. -For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as configurable model parameters or download links for initialization files. You define `config` in your [API configuration](api-configuration.md), and it is passed through to your Predictor's constructor. +For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as configurable model parameters or download links for initialization files. You define `config` in your [API configuration](configuration.md), and it is passed through to your Predictor's constructor. Your API can accept requests with different types of payloads such as `JSON`-parseable, `bytes` or `starlette.datastructures.FormData` data. Navigate to the [API requests](#api-requests) section to learn about how headers can be used to change the type of `payload` that is passed into your `predict` method. diff --git a/docs/workloads/realtime/traffic-splitter.md b/docs/workloads/realtime/traffic-splitter.md index adfee17215..03c42c8e66 100644 --- a/docs/workloads/realtime/traffic-splitter.md +++ b/docs/workloads/realtime/traffic-splitter.md @@ -4,7 +4,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t The Traffic Splitter feature allows you to split traffic between multiple Realtime APIs on your Cortex Cluster. This can be useful for A/B testing models in production. -After [deploying Realtime APIs](deployment.md), you can deploy an Traffic Splitter to provide a single endpoint that can route a request randomly to one of the target Realtime APIs. Weights can be assigned to Realtime APIs to control the percentage of requests routed to each API. +You can deploy an Traffic Splitter to provide a single endpoint that can route a request randomly to one of the target Realtime APIs. Weights can be assigned to Realtime APIs to control the percentage of requests routed to each API. **Traffic Splitters are only supported on a Cortex cluster (in AWS).** From e6589e2fbc539659f4e8cc4f9db30c541525e2ad Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Tue, 15 Dec 2020 15:25:35 -0800 Subject: [PATCH 03/11] Update docs --- docs/cli/environments.md | 72 ++++++++++++++++++++++++++++++++++++++++ docs/cli/install.md | 21 ------------ 2 files changed, 72 insertions(+), 21 deletions(-) diff --git a/docs/cli/environments.md b/docs/cli/environments.md index e69de29bb2..f546e376fb 100644 --- a/docs/cli/environments.md +++ b/docs/cli/environments.md @@ -0,0 +1,72 @@ +# Environments + +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ + +By default, the CLI has a single environment named `local`. When you create a cluster with `cortex cluster up`, an environment named `aws` or `gcp` is automatically created to point to your cluster. You can name the environment something else via the `--configure-env` flag, e.g. `cortex cluster up --configure-env prod`. You can also use the `--configure-env` flag with `cortex cluster info` and `cortex cluster configure` to create / update the specified environment. + +## Example: `local` and `aws` + +```bash +cortex deploy # uses local env; same as `cortex deploy --env local` +cortex logs my-api # uses local env; same as `cortex logs my-api --env local` +cortex delete my-api # uses local env; same as `cortex delete my-api --env local` + +cortex cluster up # configures the aws env; same as `cortex cluster up --configure-env aws` +cortex deploy --env aws +cortex deploy # uses local env; same as `cortex deploy --env local` + +# optional: change the default environment to aws +cortex env default aws # sets aws as the default env +cortex deploy # uses aws env; same as `cortex deploy --env aws` +cortex deploy --env local +``` + +## Example: multiple clusters + +```bash +cortex cluster up --config cluster1.yaml --configure-env cluster1 # configures the cluster1 env +cortex cluster up --config cluster2.yaml --configure-env cluster2 # configures the cluster2 env + +cortex deploy --env cluster1 +cortex logs my-api --env cluster1 +cortex delete my-api --env cluster1 + +cortex deploy --env cluster2 +cortex logs my-api --env cluster2 +cortex delete my-api --env cluster2 +``` + +## Example: multiple clusters, if you omitted the `--configure-env` on `cortex cluster up` + +```bash +cortex cluster info --config cluster1.yaml --configure-env cluster1 # configures the cluster1 env +cortex cluster info --config cluster2.yaml --configure-env cluster2 # configures the cluster2 env + +cortex deploy --env cluster1 +cortex logs my-api --env cluster1 +cortex delete my-api --env cluster1 + +cortex deploy --env cluster2 +cortex logs my-api --env cluster2 +cortex delete my-api --env cluster2 +``` + +## Example: configure `cortex` CLI to connect to an existing cluster + +If you are installing the `cortex` CLI on a new computer, you can configure it to access an existing Cortex cluster. + +On the computer which already has the CLI configured, run: + +```bash +cortex env list +``` + +Take note of the environment name and operator endpoint of the desired environment. + +On your new machine, run: + +```bash +cortex env configure +``` + +This will prompt for the necessary configuration. Note that the AWS credentials that you use here do not need any IAM permissions attached. If you will be running any `cortex cluster` commands specify the preferred AWS credentials using cli flags `--aws-key AWS_ACCESS_KEY_ID --aws-secret AWS_SECRET_ACCESS_KEY`. diff --git a/docs/cli/install.md b/docs/cli/install.md index d72c54e2cc..d9f1da18c3 100644 --- a/docs/cli/install.md +++ b/docs/cli/install.md @@ -21,24 +21,3 @@ $ bash -c "$(curl -sS https://raw.githubusercontent.com/cortexlabs/cortex/v0.18. By default, the Cortex CLI is installed at `/usr/local/bin/cortex`. To install the executable elsewhere, export the `CORTEX_INSTALL_PATH` environment variable to your desired location before running the command above. By default, the Cortex CLI creates a directory at `~/.cortex/` and uses it to store environment configuration. To use a different directory, export the `CORTEX_CLI_CONFIG_DIR` environment variable before running a `cortex` command. - -## Environments - -By default, the CLI has a single environment named `local`. When you create a cluster with `cortex cluster up`, an environment named `aws` or `gcp` is automatically created to point to your cluster. You can name the environment something else via the `--configure-env` flag, e.g. `cortex cluster up --configure-env prod`. You can also use the `--configure-env` flag with `cortex cluster info` and `cortex cluster configure` to create / update the specified environment. - -### Example - -```bash -cortex deploy # uses local env; same as `cortex deploy --env local` -cortex logs my-api # uses local env; same as `cortex logs my-api --env local` -cortex delete my-api # uses local env; same as `cortex delete my-api --env local` - -cortex cluster up # configures the aws env; same as `cortex cluster up --configure-env aws` -cortex deploy --env aws -cortex deploy # uses local env; same as `cortex deploy --env local` - -# optional: change the default environment to aws -cortex env default aws # sets aws as the default env -cortex deploy # uses aws env; same as `cortex deploy --env aws` -cortex deploy --env local -``` From 32c4ae2d80f8ffe74123f9b92315e8c416e6a695 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Tue, 15 Dec 2020 18:22:38 -0800 Subject: [PATCH 04/11] Update docs --- docs/aws/uninstall.md | 2 +- docs/contributing.md | 4 +- docs/guides/multi-model.md | 310 ------------------ docs/summary.md | 37 +-- .../batch.md => workloads/batch/deploy.md} | 4 +- docs/workloads/batch/predictors.md | 12 +- .../dependencies/deploy.md} | 4 +- .../{ => dependencies}/python-packages.md | 2 +- .../{ => dependencies}/system-packages.md | 2 +- docs/{guides => workloads}/exporting.md | 0 .../multi-model/deploy.md} | 2 +- docs/workloads/multi-model/multi-model.md | 156 +++++++++ docs/workloads/realtime/autoscaling.md | 2 +- .../realtime/deploy.md} | 6 +- docs/workloads/realtime/predictors.md | 14 +- docs/workloads/realtime/traffic-splitter.md | 75 ----- .../traffic-splitter/configuration.md | 14 + .../traffic-splitter/deploy.md} | 8 +- pkg/workloads/cortex/client/setup.py | 4 +- 19 files changed, 214 insertions(+), 444 deletions(-) delete mode 100644 docs/guides/multi-model.md rename docs/{tutorials/batch.md => workloads/batch/deploy.md} (96%) rename docs/{tutorials/project.md => workloads/dependencies/deploy.md} (87%) rename docs/workloads/{ => dependencies}/python-packages.md (99%) rename docs/workloads/{ => dependencies}/system-packages.md (97%) rename docs/{guides => workloads}/exporting.md (100%) rename docs/{tutorials/multi-model.md => workloads/multi-model/deploy.md} (97%) create mode 100644 docs/workloads/multi-model/multi-model.md rename docs/{tutorials/realtime.md => workloads/realtime/deploy.md} (89%) delete mode 100644 docs/workloads/realtime/traffic-splitter.md create mode 100644 docs/workloads/traffic-splitter/configuration.md rename docs/{tutorials/traffic-splitter.md => workloads/traffic-splitter/deploy.md} (80%) diff --git a/docs/aws/uninstall.md b/docs/aws/uninstall.md index 5a094e2de0..5d9ec3e217 100644 --- a/docs/aws/uninstall.md +++ b/docs/aws/uninstall.md @@ -43,6 +43,6 @@ On rare occasions, `cortex cluster down` may not be able to spin down your Corte 1. Select the final stack (the one that ends in "-cluster") and click "Delete". - If deleting the stack fails, navigate to the EC2 dashboard in the AWS console, delete the load balancers that are associated with the cluster, and try again (you can determine which load balancers are associated with the cluster by setting the correct region in the console and checking the `cortex.dev/cluster-name` tag on all load balancers). If the problem still persists, delete any other AWS resources that are blocking the stack deletion and try again. Feel free to reach out to us on [gitter](https://gitter.im/cortexlabs/cortex) if you still aren't able to delete the stacks. + If deleting the stack fails, navigate to the EC2 dashboard in the AWS console, delete the load balancers that are associated with the cluster, and try again (you can determine which load balancers are associated with the cluster by setting the correct region in the console and checking the `cortex.dev/cluster-name` tag on all load balancers). If the problem still persists, delete any other AWS resources that are blocking the stack deletion and try again. 1. In rare cases, you may need to delete other AWS resources associated with your Cortex cluster. For each the following resources, go to the appropriate AWS Dashboard (in the region that your cluster was in), and confirm that there are no resources left behind by the cluster: API Gateway API, API Gateway VPC Link, CloudWatch Dashboard, SQS Queues, S3 Bucket, and CloudWatch LogGroups (the Cortex bucket and log groups are not deleted by `cluster down` in order to preserve your data). diff --git a/docs/contributing.md b/docs/contributing.md index ebdb48d443..7e55824d5d 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -2,7 +2,7 @@ ## Remote development -We recommend that you run your development environment on a cloud instance due to frequent docker registry pushing, e.g. an AWS EC2 instance or GCP VM. We've had a good experience using [Mutagen](https://mutagen.io/documentation/introduction) to synchronize local / remote file systems. Feel free to reach out to us on [gitter](https://gitter.im/cortexlabs/cortex) if you have any questions about this. +We recommend that you run your development environment on a cloud instance due to frequent docker registry pushing, e.g. an AWS EC2 instance or GCP VM. We've had a good experience using [Mutagen](https://mutagen.io/documentation/introduction) to synchronize local / remote file systems. ## Prerequisites @@ -248,5 +248,3 @@ If you are only modifying the operator, `make operator-local-aws` will build and If you are modifying code in the API images (i.e. any of the Python serving code), `make images-dev-aws` may build more images than you need during testing. For example, if you are only testing using the `python-predictor-cpu` image, you can run `./dev/registry.sh update-single python-predictor-cpu --provider aws` (or use `--provider local` if testing locally). See `Makefile` for additional dev commands. - -Feel free to [chat with us](https://gitter.im/cortexlabs/cortex) if you have any questions. diff --git a/docs/guides/multi-model.md b/docs/guides/multi-model.md deleted file mode 100644 index 8ebc6f5c63..0000000000 --- a/docs/guides/multi-model.md +++ /dev/null @@ -1,310 +0,0 @@ -# Multi-model endpoints - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -It is possible to serve multiple models in the same Cortex API using any type of Cortex Predictor. In this guide we'll show the general outline of a multi-model deployment. - -## Python Predictor - -### Specifying models in API config - -#### `cortex.yaml` - -Even though it looks as if there's only a single model served, there are actually 4 different versions saved in `s3://cortex-examples/sklearn/mpg-estimator/linreg/`. - -```yaml -- name: mpg-estimator - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - model_path: s3://cortex-examples/sklearn/mpg-estimator/linreg/ -``` - -#### `predictor.py` - -```python -import mlflow.sklearn -import numpy as np - - -class PythonPredictor: - def __init__(self, config, python_client): - self.client = python_client - - def load_model(self, model_path): - return mlflow.sklearn.load_model(model_path) - - def predict(self, payload, query_params): - model_version = query_params.get("version") - - # process the input - # ... - - model = self.client.get_model(model_version=model_version) - result = model.predict(model_input) - - return {"prediction": result, "model": {"version": model_version}} -``` - -#### Making predictions - -For convenience, we'll export our API's endpoint (yours will be different from mine): - -```bash -$ api_endpoint=http://a36473270de8b46e79a769850dd3372d-c67035afa37ef878.elb.us-west-2.amazonaws.com/mpg-estimator -``` - -Next, we'll make a prediction using the sentiment analyzer model by specifying the model version as a query parameter: - -```bash -$ curl "${api_endpoint}?version=1" -X POST -H "Content-Type: application/json" -d @sample.json - -{"prediction": 26.929889872154185, "model": {"version": "1"}} -``` - -Then we'll make a prediction using the 2nd version of the model (since they are just duplicate models, it will only return the same result): - -```bash -$ curl "${api_endpoint}?version=2" -X POST -H "Content-Type: application/json" -d @sample.json - -{"prediction": 26.929889872154185, "model": {"version": "2"}} -``` - -### Without specifying models in API config - -For the Python Predictor, the API configuration for a multi-model API is similar to single-model APIs. The Predictor's `config` field can be used to customize the behavior of the `predictor.py` implementation. - -#### `cortex.yaml` - -```yaml -- name: multi-model-text-analyzer - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - config: {...} - ... -``` - -#### `predictor.py` - -Models should be loaded within the predictor's constructor. Query parameters are encouraged to be used when selecting the model for inference. - -```python -# import modules here - -class PythonPredictor: - def __init__(self, config): - # prepare the environment, download/load models/labels, etc - # ... - - # load models - self.analyzer = initialize_model("sentiment-analysis") - self.summarizer = initialize_model("summarization") - - def predict(self, query_params, payload): - # preprocessing - model_name = query_params.get("model") - model_input = payload["text"] - # ... - - # make prediction - if model_name == "sentiment": - results = self.analyzer(model_input) - predicted_label = postprocess(results) - return {"label": predicted_label} - elif model_name == "summarizer": - results = self.summarizer(model_input) - predicted_label = postprocess(results) - return {"label": predicted_label} - else: - return JSONResponse({"error": f"unknown model: {model_name}"}, status_code=400) -``` - -#### Making predictions - -For convenience, we'll export our API's endpoint (yours will be different from mine): - -```bash -$ api_endpoint=http://a36473270de8b46e79a769850dd3372d-c67035afa37ef878.elb.us-west-2.amazonaws.com/multi-model-text-analyzer -``` - -Next, we'll make a prediction using the sentiment analyzer model by specifying the model name as a query parameter: - -```bash -$ curl "${api_endpoint}?model=sentiment" -X POST -H "Content-Type: application/json" -d @sample-sentiment.json - -{"label": "POSITIVE", "score": 0.9998506903648376} -``` - -Then we'll make a prediction using the text summarizer model: - -```bash -$ curl "${api_endpoint}?model=summarizer" -X POST -H "Content-Type: application/json" -d @sample-summarizer.json - -Machine learning is the study of algorithms and statistical models that computer systems use to perform a specific task. It is seen as a subset of artificial intelligence. Machine learning algorithms are used in a wide variety of applications, such as email filtering and computer vision. In its application across business problems, machine learning is also referred to as predictive analytics. -``` - -## TensorFlow Predictor - -For the TensorFlow Predictor, a multi-model API is configured by placing the list of models in the Predictor's `models` field (each model will specify its own unique name). The `predict()` method of the `tensorflow_client` object expects a second argument that represents the name of the model that will be used for inference. - -### `cortex.yaml` - -```yaml -- name: multi-model-classifier - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor.py - models: - paths: - - name: inception - model_path: s3://cortex-examples/tensorflow/image-classifier/inception/ - - name: iris - model_path: s3://cortex-examples/tensorflow/iris-classifier/nn/ - - name: resnet50 - model_path: s3://cortex-examples/tensorflow/resnet50/ - ... -``` - -### `predictor.py` - -```python -# import modules here - -class TensorFlowPredictor: - def __init__(self, tensorflow_client, config): - # prepare the environment, download/load labels, etc - # ... - - self.client = tensorflow_client - - def predict(self, payload, query_params): - # preprocessing - model_name = query_params["model"] - model_input = preprocess(payload["url"]) - - # make prediction - results = self.client.predict(model_input, model_name) - - # postprocess - predicted_label = postprocess(results) - - return {"label": predicted_label} -``` - -### Making predictions - -For convenience, we'll export our API's endpoint (yours will be different from mine): - -```bash -$ api_endpoint=http://a36473270de8b46e79a769850dd3372d-c67035afa37ef878.elb.us-west-2.amazonaws.com/multi-model-classifier -``` - -Next, we'll make a prediction using the iris classifier model by specifying the model name as a query parameter: - -```bash -$ curl "${ENDPOINT}?model=iris" -X POST -H "Content-Type: application/json" -d @sample-iris.json - -{"label": "setosa"} -``` - -Then we'll make a prediction using the resnet50 model: - -```bash -$ curl "${ENDPOINT}?model=resnet50" -X POST -H "Content-Type: application/json" -d @sample-image.json - -{"label": "sports_car"} -``` - -Finally we'll make a prediction using the inception model: - -```bash -$ curl "${ENDPOINT}?model=inception" -X POST -H "Content-Type: application/json" -d @sample-image.json - -{"label": "sports_car"} -``` - -## ONNX Predictor - -For the ONNX Predictor, a multi-model API is configured by placing the list of models in the Predictor's `models` field (each model will specify its own unique name). The `predict()` method of the `onnx_client` object expects a second argument that represents the name of the model that will be used for inference. - -### `cortex.yaml` - -```yaml -- name: multi-model-classifier - kind: RealtimeAPI - predictor: - type: onnx - path: predictor.py - models: - paths: - - name: resnet50 - model_path: s3://cortex-examples/onnx/resnet50/ - - name: mobilenet - model_path: s3://cortex-examples/onnx/mobilenet/ - - name: shufflenet - model_path: s3://cortex-examples/onnx/shufflenet/ - ... -``` - -### `predictor.py` - -```python -# import modules here - -class ONNXPredictor: - def __init__(self, onnx_client, config): - # prepare the environment, download/load labels, etc - # ... - - self.client = onnx_client - - def predict(self, payload, query_params): - # process the input - model_name = query_params["model"] - model_input = preprocess(payload["url"]) - - # make prediction - results = self.client.predict(model_input, model_name) - - # postprocess - predicted_label = postprocess(results) - - return {"label": predicted_label} - -``` - -### Making predictions - -For convenience, we'll export our API's endpoint (yours will be different from mine): - -```bash -$ api_endpoint=http://a36473270de8b46e79a769850dd3372d-c67035afa37ef878.elb.us-west-2.amazonaws.com/multi-model-classifier -``` - -Next, we'll make a prediction using the resnet50 model by specifying the model name as a query parameter: - -```bash -$ curl "${ENDPOINT}?model=resnet50" -X POST -H "Content-Type: application/json" -d @sample.json - -{"label": "tabby"} -``` - -Then we'll make a prediction using the mobilenet model: - -```bash -$ curl "${ENDPOINT}?model=mobilenet" -X POST -H "Content-Type: application/json" -d @sample.json - -{"label": "tabby"} -``` - -Finally we'll make a prediction using the shufflenet model: - -```bash -$ curl "${ENDPOINT}?model=shufflenet" -X POST -H "Content-Type: application/json" -d @sample.json - -{"label": "Egyptian_cat"} -``` diff --git a/docs/summary.md b/docs/summary.md index 3636e4a50b..4dbb90e388 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -1,6 +1,6 @@ # Table of contents -* [Get started](tutorials/realtime.md) +* [Get started](workloads/realtime/deploy.md) * [Community](https://gitter.im/cortexlabs/cortex) * [Contributing](contributing.md) @@ -13,14 +13,6 @@ * [Telemetry](cli/telemetry.md) * [Uninstall](cli/uninstall.md) -## Tutorials - -* [Realtime API](tutorials/realtime.md) -* [Batch API](tutorials/batch.md) -* [Multi-model API](tutorials/multi-model.md) -* [Traffic splitter](tutorials/traffic-splitter.md) -* [Project directory](tutorials/project.md) - ## Running on AWS * [Install](aws/install.md) @@ -46,20 +38,27 @@ ## Workloads * Realtime - * [Predictor implementation](workloads/realtime/predictors.md) - * [API configuration](workloads/realtime/configuration.md) - * [API statuses](workloads/realtime/statuses.md) + * [Deploy](workloads/realtime/deploy.md) + * [Predictor](workloads/realtime/predictors.md) + * [Configuration](workloads/realtime/configuration.md) + * [Statuses](workloads/realtime/statuses.md) * [Models](workloads/realtime/models.md) * [Parallelism](workloads/realtime/parallelism.md) * [Autoscaling](workloads/realtime/autoscaling.md) - * [Traffic Splitter](workloads/realtime/traffic-splitter.md) * Batch - * [Predictor implementation](workloads/batch/predictors.md) - * [API configuration](workloads/batch/configuration.md) + * [Deploy](workloads/batch/deploy.md) + * [Predictor](workloads/batch/predictors.md) + * [Configuration](workloads/batch/configuration.md) + * [Statuses](workloads/batch/statuses.md) * [Endpoints](workloads/batch/endpoints.md) - * [Job statuses](workloads/batch/statuses.md) -* [Python packages](workloads/python-packages.md) -* [System packages](workloads/system-packages.md) +* Traffic Splitter + * [Deploy](workloads/traffic-splitter/deploy.md) + * [Python packages](workloads/traffic-splitter/configuration.md) +* Dependencies + * [Deploy](workloads/dependencies/deploy.md) + * [Python packages](workloads/dependencies/python-packages.md) + * [System packages](workloads/dependencies/system-packages.md) +* [Exporting models](workloads/exporting.md) ## Troubleshooting @@ -71,8 +70,6 @@ ## Guides -* [Exporting models](guides/exporting.md) -* [Multi-model endpoints](guides/multi-model.md) * [View API metrics](guides/metrics.md) * [Setting up kubectl](guides/kubectl.md) * [Self-hosted Docker images](guides/self-hosted-images.md) diff --git a/docs/tutorials/batch.md b/docs/workloads/batch/deploy.md similarity index 96% rename from docs/tutorials/batch.md rename to docs/workloads/batch/deploy.md index b10691f806..104e7040e5 100644 --- a/docs/tutorials/batch.md +++ b/docs/workloads/batch/deploy.md @@ -1,6 +1,6 @@ -# Deploy a batch API +# BatchAPI -Deploy models as batch APIs that can orchestrate distributed batch inference jobs on large datasets. +Deploy batch APIs that can orchestrate distributed batch inference jobs on large datasets. ## Key features diff --git a/docs/workloads/batch/predictors.md b/docs/workloads/batch/predictors.md index e5d1bb0cca..cba8e74f96 100644 --- a/docs/workloads/batch/predictors.md +++ b/docs/workloads/batch/predictors.md @@ -2,8 +2,6 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -Once your model is [exported](../../guides/exporting.md), you can implement one of Cortex's Predictor classes to deploy your model. A Predictor is a Python class that describes how to initialize your model and use it to make predictions. - Which Predictor you use depends on how your model is exported: * [TensorFlow Predictor](#tensorflow-predictor) if your model is exported as a TensorFlow `SavedModel` @@ -167,8 +165,6 @@ torchvision==0.6.1 The pre-installed system packages are listed in [images/python-predictor-cpu/Dockerfile](https://github.com/cortexlabs/cortex/tree/master/images/python-predictor-cpu/Dockerfile) (for CPU), [images/python-predictor-gpu/Dockerfile](https://github.com/cortexlabs/cortex/tree/master/images/python-predictor-gpu/Dockerfile) (for GPU), or [images/python-predictor-inf/Dockerfile](https://github.com/cortexlabs/cortex/tree/master/images/python-predictor-inf/Dockerfile) (for Inferentia). -If your application requires additional dependencies, you can install additional [Python packages](../python-packages.md) and [system packages](../system-packages.md). - ## TensorFlow Predictor ### Interface @@ -224,7 +220,7 @@ class TensorFlowPredictor: Cortex provides a `tensorflow_client` to your Predictor's constructor. `tensorflow_client` is an instance of [TensorFlowClient](https://github.com/cortexlabs/cortex/tree/master/pkg/workloads/cortex/lib/client/tensorflow.py) that manages a connection to a TensorFlow Serving container to make predictions using your model. It should be saved as an instance variable in your Predictor, and your `predict()` function should call `tensorflow_client.predict()` to make an inference with your exported TensorFlow model. Preprocessing of the JSON payload and postprocessing of predictions can be implemented in your `predict()` function as well. -When multiple models are defined using the Predictor's `models` field, the `tensorflow_client.predict()` method expects a second argument `model_name` which must hold the name of the model that you want to use for inference (for example: `self.client.predict(payload, "text-generator")`). See the [multi model guide](../../guides/multi-model.md#tensorflow-predictor) for more information. +When multiple models are defined using the Predictor's `models` field, the `tensorflow_client.predict()` method expects a second argument `model_name` which must hold the name of the model that you want to use for inference (for example: `self.client.predict(payload, "text-generator")`). For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your [API configuration](configuration.md), and it is passed through to your Predictor's constructor. The `config` parameters in the `API configuration` can be overridden by providing `config` in the job submission requests. @@ -250,8 +246,6 @@ tensorflow==2.3.0 The pre-installed system packages are listed in [images/tensorflow-predictor/Dockerfile](https://github.com/cortexlabs/cortex/tree/master/images/tensorflow-predictor/Dockerfile). -If your application requires additional dependencies, you can install additional [Python packages](../python-packages.md) and [system packages](../system-packages.md). - ## ONNX Predictor ### Interface @@ -307,7 +301,7 @@ class ONNXPredictor: Cortex provides an `onnx_client` to your Predictor's constructor. `onnx_client` is an instance of [ONNXClient](https://github.com/cortexlabs/cortex/tree/master/pkg/workloads/cortex/lib/client/onnx.py) that manages an ONNX Runtime session to make predictions using your model. It should be saved as an instance variable in your Predictor, and your `predict()` function should call `onnx_client.predict()` to make an inference with your exported ONNX model. Preprocessing of the JSON payload and postprocessing of predictions can be implemented in your `predict()` function as well. -When multiple models are defined using the Predictor's `models` field, the `onnx_client.predict()` method expects a second argument `model_name` which must hold the name of the model that you want to use for inference (for example: `self.client.predict(model_input, "text-generator")`). See the [multi model guide](../../guides/multi-model.md#onnx-predictor) for more information. +When multiple models are defined using the Predictor's `models` field, the `onnx_client.predict()` method expects a second argument `model_name` which must hold the name of the model that you want to use for inference (for example: `self.client.predict(model_input, "text-generator")`). For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your [API configuration](configuration.md), and it is passed through to your Predictor's constructor. The `config` parameters in the `API configuration` can be overridden by providing `config` in the job submission requests. @@ -329,5 +323,3 @@ requests==2.24.0 The pre-installed system packages are listed in [images/onnx-predictor-cpu/Dockerfile](https://github.com/cortexlabs/cortex/tree/master/images/onnx-predictor-cpu/Dockerfile) (for CPU) or [images/onnx-predictor-gpu/Dockerfile](https://github.com/cortexlabs/cortex/tree/master/images/onnx-predictor-gpu/Dockerfile) (for GPU). - -If your application requires additional dependencies, you can install additional [Python packages](../python-packages.md) and [system packages](../system-packages.md). diff --git a/docs/tutorials/project.md b/docs/workloads/dependencies/deploy.md similarity index 87% rename from docs/tutorials/project.md rename to docs/workloads/dependencies/deploy.md index dc512bfb49..4be723c8bd 100644 --- a/docs/tutorials/project.md +++ b/docs/workloads/dependencies/deploy.md @@ -1,5 +1,7 @@ # Deploy a project +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ + You can deploy an API by providing a project directory. Cortex will save the project directory and make it available during API initialization. ```bash @@ -57,5 +59,5 @@ cx.create_api(api_spec, project_dir=".") ``` ```bash -$ cortex deploy api.yaml -e aws +$ cortex deploy api.yaml --env aws ``` diff --git a/docs/workloads/python-packages.md b/docs/workloads/dependencies/python-packages.md similarity index 99% rename from docs/workloads/python-packages.md rename to docs/workloads/dependencies/python-packages.md index b34881f503..c49274c91f 100644 --- a/docs/workloads/python-packages.md +++ b/docs/workloads/dependencies/python-packages.md @@ -1,4 +1,4 @@ -# Python/Conda packages +# Python / Conda packages _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ diff --git a/docs/workloads/system-packages.md b/docs/workloads/dependencies/system-packages.md similarity index 97% rename from docs/workloads/system-packages.md rename to docs/workloads/dependencies/system-packages.md index 6879c5a033..1f8eaddb9c 100644 --- a/docs/workloads/system-packages.md +++ b/docs/workloads/dependencies/system-packages.md @@ -92,7 +92,7 @@ docker build . -t org/my-api:latest _If you are only running Cortex locally, you can skip this section_ -You can push your built Docker image to a public registry of your choice (e.g. Docker Hub), or to a private registry on ECR or Docker Hub (for private Docker Hub, also follow [this guide](../guides/private-docker.md) to configure access in your cluster). +You can push your built Docker image to a public registry of your choice (e.g. Docker Hub), or to a private registry on ECR or Docker Hub (for private Docker Hub, also follow [this guide](../../guides/private-docker.md) to configure access in your cluster). For example, to use ECR, first create a repository to store your image: diff --git a/docs/guides/exporting.md b/docs/workloads/exporting.md similarity index 100% rename from docs/guides/exporting.md rename to docs/workloads/exporting.md diff --git a/docs/tutorials/multi-model.md b/docs/workloads/multi-model/deploy.md similarity index 97% rename from docs/tutorials/multi-model.md rename to docs/workloads/multi-model/deploy.md index 043ea6c6ad..2d2248183d 100644 --- a/docs/tutorials/multi-model.md +++ b/docs/workloads/multi-model/deploy.md @@ -1,4 +1,4 @@ -# Deploy a multi-model API +# Multi-model API Deploy several models in a single API to improve resource utilization efficiency. diff --git a/docs/workloads/multi-model/multi-model.md b/docs/workloads/multi-model/multi-model.md new file mode 100644 index 0000000000..c76ecb4202 --- /dev/null +++ b/docs/workloads/multi-model/multi-model.md @@ -0,0 +1,156 @@ +# Configuration + +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ + +## `PythonPredictor` + +### Specifying models in API configuration + +#### `cortex.yaml` + +The directory `s3://cortex-examples/sklearn/mpg-estimator/linreg/` contains 4 different versions of the model. + +```yaml +- name: mpg-estimator + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + model_path: s3://cortex-examples/sklearn/mpg-estimator/linreg/ +``` + +#### `predictor.py` + +```python +import mlflow.sklearn +import numpy as np + + +class PythonPredictor: + def __init__(self, config, python_client): + self.client = python_client + + def load_model(self, model_path): + return mlflow.sklearn.load_model(model_path) + + def predict(self, payload, query_params): + model_version = query_params.get("version") + + # model_input = ... + + model = self.client.get_model(model_version=model_version) + result = model.predict(model_input) + + return {"prediction": result, "model": {"version": model_version}} +``` + +### Without specifying models in API configuration + +#### `cortex.yaml` + +```yaml +- name: text-analyzer + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + ... +``` + +#### `predictor.py` + +```python +class PythonPredictor: + def __init__(self, config): + self.analyzer = initialize_model("sentiment-analysis") + self.summarizer = initialize_model("summarization") + + def predict(self, query_params, payload): + model_name = query_params.get("model") + model_input = payload["text"] + + # ... + + if model_name == "analyzer": + results = self.analyzer(model_input) + predicted_label = postprocess(results) + return {"label": predicted_label} + elif model_name == "summarizer": + results = self.summarizer(model_input) + predicted_label = postprocess(results) + return {"label": predicted_label} + else: + return JSONResponse({"error": f"unknown model: {model_name}"}, status_code=400) +``` + +## `TensorFlowPredictor` + +### `cortex.yaml` + +```yaml +- name: multi-model-classifier + kind: RealtimeAPI + predictor: + type: tensorflow + path: predictor.py + models: + paths: + - name: inception + model_path: s3://cortex-examples/tensorflow/image-classifier/inception/ + - name: iris + model_path: s3://cortex-examples/tensorflow/iris-classifier/nn/ + - name: resnet50 + model_path: s3://cortex-examples/tensorflow/resnet50/ + ... +``` + +### `predictor.py` + +```python +class TensorFlowPredictor: + def __init__(self, tensorflow_client, config): + self.client = tensorflow_client + + def predict(self, payload, query_params): + model_name = query_params["model"] + model_input = preprocess(payload["url"]) + results = self.client.predict(model_input, model_name) + predicted_label = postprocess(results) + return {"label": predicted_label} +``` + +## `ONNXPredictor` + +### `cortex.yaml` + +```yaml +- name: multi-model-classifier + kind: RealtimeAPI + predictor: + type: onnx + path: predictor.py + models: + paths: + - name: resnet50 + model_path: s3://cortex-examples/onnx/resnet50/ + - name: mobilenet + model_path: s3://cortex-examples/onnx/mobilenet/ + - name: shufflenet + model_path: s3://cortex-examples/onnx/shufflenet/ + ... +``` + +### `predictor.py` + +```python +class ONNXPredictor: + def __init__(self, onnx_client, config): + self.client = onnx_client + + def predict(self, payload, query_params): + model_name = query_params["model"] + model_input = preprocess(payload["url"]) + results = self.client.predict(model_input, model_name) + predicted_label = postprocess(results) + return {"label": predicted_label} +``` diff --git a/docs/workloads/realtime/autoscaling.md b/docs/workloads/realtime/autoscaling.md index 81907633cd..3ee8c07731 100644 --- a/docs/workloads/realtime/autoscaling.md +++ b/docs/workloads/realtime/autoscaling.md @@ -74,4 +74,4 @@ Assuming that `window` and `upscale_stabilization_period` are set to their defau Keep these delays in mind when considering overprovisioning (see above) and when determining appropriate values for `window` and `upscale_stabilization_period`. If you want the autoscaler to react as quickly as possible, set `upscale_stabilization_period` and `window` to their minimum values (0s and 10s respectively). -If it takes a long time to initialize your API replica (i.e. install dependencies and run your predictor's `__init__()` function), consider building your own API image to use instead of the default image. With this approach, you can pre-download/build/install any custom dependencies and bake them into the image. See [here](../system-packages.md#custom-docker-image) for documentation. +If it takes a long time to initialize your API replica (i.e. install dependencies and run your predictor's `__init__()` function), consider building your own API image to use instead of the default image. With this approach, you can pre-download/build/install any custom dependencies and bake them into the image. diff --git a/docs/tutorials/realtime.md b/docs/workloads/realtime/deploy.md similarity index 89% rename from docs/tutorials/realtime.md rename to docs/workloads/realtime/deploy.md index 5befb96bc8..709ac55061 100644 --- a/docs/tutorials/realtime.md +++ b/docs/workloads/realtime/deploy.md @@ -1,6 +1,8 @@ -# Deploy a realtime API +# RealtimeAPI -Deploy models as realtime APIs that can respond to prediction requests on demand. +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ + +Deploy realtime APIs that can respond to prediction requests on demand. ## Key features diff --git a/docs/workloads/realtime/predictors.md b/docs/workloads/realtime/predictors.md index b5b23921a9..3c8288f46d 100644 --- a/docs/workloads/realtime/predictors.md +++ b/docs/workloads/realtime/predictors.md @@ -2,8 +2,6 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -Once your model is [exported](../../guides/exporting.md), you can implement one of Cortex's Predictor classes to deploy your model. A Predictor is a Python class that describes how to initialize your model and use it to make predictions. - Which Predictor you use depends on how your model is exported: * [TensorFlow Predictor](#tensorflow-predictor) if your model is exported as a TensorFlow `SavedModel` @@ -126,7 +124,7 @@ class PythonPredictor: When explicit model paths are specified in the Python predictor's API configuration, Cortex provides a `python_client` to your Predictor's constructor. `python_client` is an instance of [PythonClient](https://github.com/cortexlabs/cortex/tree/master/pkg/workloads/cortex/lib/client/python.py) that is used to load model(s) (it calls the `load_model()` method of your predictor, which must be defined when using explicit model paths). It should be saved as an instance variable in your Predictor, and your `predict()` function should call `python_client.get_model()` to load your model for inference. Preprocessing of the JSON payload and postprocessing of predictions can be implemented in your `predict()` function as well. -When multiple models are defined using the Predictor's `models` field, the `python_client.get_model()` method expects an argument `model_name` which must hold the name of the model that you want to load (for example: `self.client.get_model("text-generator")`). There is also an optional second argument to specify the model version. See [models](models.md) and the [multi model guide](../../guides/multi-model.md#python-predictor) for more information. +When multiple models are defined using the Predictor's `models` field, the `python_client.get_model()` method expects an argument `model_name` which must hold the name of the model that you want to load (for example: `self.client.get_model("text-generator")`). There is also an optional second argument to specify the model version. For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your API configuration, and it is passed through to your Predictor's constructor. @@ -207,8 +205,6 @@ torchvision==0.6.1 The pre-installed system packages are listed in [images/python-predictor-cpu/Dockerfile](https://github.com/cortexlabs/cortex/tree/master/images/python-predictor-cpu/Dockerfile) (for CPU), [images/python-predictor-gpu/Dockerfile](https://github.com/cortexlabs/cortex/tree/master/images/python-predictor-gpu/Dockerfile) (for GPU), or [images/python-predictor-inf/Dockerfile](https://github.com/cortexlabs/cortex/tree/master/images/python-predictor-inf/Dockerfile) (for Inferentia). -If your application requires additional dependencies, you can install additional [Python packages](../python-packages.md) and [system packages](../system-packages.md). - ## TensorFlow Predictor ### Interface @@ -269,7 +265,7 @@ class TensorFlowPredictor: Cortex provides a `tensorflow_client` to your Predictor's constructor. `tensorflow_client` is an instance of [TensorFlowClient](https://github.com/cortexlabs/cortex/tree/master/pkg/workloads/cortex/lib/client/tensorflow.py) that manages a connection to a TensorFlow Serving container to make predictions using your model. It should be saved as an instance variable in your Predictor, and your `predict()` function should call `tensorflow_client.predict()` to make an inference with your exported TensorFlow model. Preprocessing of the JSON payload and postprocessing of predictions can be implemented in your `predict()` function as well. -When multiple models are defined using the Predictor's `models` field, the `tensorflow_client.predict()` method expects a second argument `model_name` which must hold the name of the model that you want to use for inference (for example: `self.client.predict(payload, "text-generator")`). There is also an optional third argument to specify the model version. See [models](models.md) and the [multi model guide](../../guides/multi-model.md#tensorflow-predictor) for more information. +When multiple models are defined using the Predictor's `models` field, the `tensorflow_client.predict()` method expects a second argument `model_name` which must hold the name of the model that you want to use for inference (for example: `self.client.predict(payload, "text-generator")`). There is also an optional third argument to specify the model version. For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as configurable model parameters or download links for initialization files. You define `config` in your [API configuration](configuration.md), and it is passed through to your Predictor's constructor. @@ -299,8 +295,6 @@ tensorflow==2.3.0 The pre-installed system packages are listed in [images/tensorflow-predictor/Dockerfile](https://github.com/cortexlabs/cortex/tree/master/images/tensorflow-predictor/Dockerfile). -If your application requires additional dependencies, you can install additional [Python packages](../python-packages.md) and [system packages](../system-packages.md). - ## ONNX Predictor ### Interface @@ -361,7 +355,7 @@ class ONNXPredictor: Cortex provides an `onnx_client` to your Predictor's constructor. `onnx_client` is an instance of [ONNXClient](https://github.com/cortexlabs/cortex/tree/master/pkg/workloads/cortex/lib/client/onnx.py) that manages an ONNX Runtime session to make predictions using your model. It should be saved as an instance variable in your Predictor, and your `predict()` function should call `onnx_client.predict()` to make an inference with your exported ONNX model. Preprocessing of the JSON payload and postprocessing of predictions can be implemented in your `predict()` function as well. -When multiple models are defined using the Predictor's `models` field, the `onnx_client.predict()` method expects a second argument `model_name` which must hold the name of the model that you want to use for inference (for example: `self.client.predict(model_input, "text-generator")`). There is also an optional third argument to specify the model version. See [models](models.md) and the [multi model guide](../../guides/multi-model.md#onnx-predictor) for more information. +When multiple models are defined using the Predictor's `models` field, the `onnx_client.predict()` method expects a second argument `model_name` which must hold the name of the model that you want to use for inference (for example: `self.client.predict(model_input, "text-generator")`). There is also an optional third argument to specify the model version. For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as configurable model parameters or download links for initialization files. You define `config` in your [API configuration](configuration.md), and it is passed through to your Predictor's constructor. @@ -388,8 +382,6 @@ requests==2.24.0 The pre-installed system packages are listed in [images/onnx-predictor-cpu/Dockerfile](https://github.com/cortexlabs/cortex/tree/master/images/onnx-predictor-cpu/Dockerfile) (for CPU) or [images/onnx-predictor-gpu/Dockerfile](https://github.com/cortexlabs/cortex/tree/master/images/onnx-predictor-gpu/Dockerfile) (for GPU). -If your application requires additional dependencies, you can install additional [Python packages](../python-packages.md) and [system packages](../system-packages.md). - ## API requests The type of the `payload` parameter in `predict(self, payload)` can vary based on the content type of the request. The `payload` parameter is parsed according to the `Content-Type` header in the request. Here are the parsing rules (see below for examples): diff --git a/docs/workloads/realtime/traffic-splitter.md b/docs/workloads/realtime/traffic-splitter.md deleted file mode 100644 index 03c42c8e66..0000000000 --- a/docs/workloads/realtime/traffic-splitter.md +++ /dev/null @@ -1,75 +0,0 @@ -# Traffic Splitter - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -The Traffic Splitter feature allows you to split traffic between multiple Realtime APIs on your Cortex Cluster. This can be useful for A/B testing models in production. - -You can deploy an Traffic Splitter to provide a single endpoint that can route a request randomly to one of the target Realtime APIs. Weights can be assigned to Realtime APIs to control the percentage of requests routed to each API. - -**Traffic Splitters are only supported on a Cortex cluster (in AWS).** - -## Traffic Splitter Configuration - -Traffic Splitter expects the target Realtime APIs to already be running or be included in the same configuration file as the Traffic Splitter. The traffic is routed according to the specified weights. The weights assigned to all Realtime APIs must to sum to 100. - -```yaml -- name: # Traffic Splitter name (required) - kind: TrafficSplitter # must be "TrafficSplitter", create an Traffic Splitter which routes traffic to multiple Realtime APIs - networking: - endpoint: # the endpoint for the Traffic Splitter (default: ) - api_gateway: public | none # whether to create a public API Gateway endpoint for this API (if not, the API will still be accessible via the load balancer) (default: public, unless disabled cluster-wide) - apis: # list of Realtime APIs to target - - name: # name of a Realtime API that is already running or is included in the same configuration file (required) - weight: # percentage of traffic to route to the Realtime API (all weights must sum to 100) (required) -``` - -## `cortex deploy` - -The `cortex deploy` command is used to deploy an Traffic Splitter. - -```bash -$ cortex deploy - -created traffic-splitter (TrafficSplitter) -``` - -Traffic Splitters are declarative, so to update your Traffic Splitter, you can modify the configuration and re-run `cortex deploy`. - -## `cortex get` - -The `cortex get` command displays the status of your Realtime APIs and Traffic Splitters, and `cortex get ` shows additional information about a specific Traffic Splitter. - -```bash -$ cortex get traffic-splitter - -apis weights status requested last update avg request 2XX 5XX -another-my-api 80 live 1 5m - - - -my-api 20 live 1 6m - - - - -last updated: 4m -endpoint: https://******.execute-api.eu-central-1.amazonaws.com/traffic-splitter -example curl: curl https://******.execute-api.eu-central-1.amazonaws.com/traffic-splitter -X POST -H "Content-Type: application/json" -d @sample.json -... -``` - -## Making a prediction - -You can use `curl` to test your Traffic Splitter. This will distribute the requests across the Realtime APIs targeted by the Traffic Splitter: - -```bash -$ curl http://***.amazonaws.com/traffic-splitter \ - -X POST -H "Content-Type: application/json" \ - -d '{"key": "value"}' -``` - -## `cortex delete` - -Use `cortex delete ` to delete your Traffic Splitter: - -```bash -$ cortex delete traffic-splitter - -deleted traffic-splitter -``` - -Note that this will not delete the Realtime APIs targeted by the Traffic Splitter. diff --git a/docs/workloads/traffic-splitter/configuration.md b/docs/workloads/traffic-splitter/configuration.md new file mode 100644 index 0000000000..14eb46daf9 --- /dev/null +++ b/docs/workloads/traffic-splitter/configuration.md @@ -0,0 +1,14 @@ +# Configuration + +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ + +```yaml +- name: # Traffic Splitter name (required) + kind: TrafficSplitter + networking: + endpoint: # the endpoint for the Traffic Splitter (default: ) + api_gateway: public | none # whether to create a public API Gateway endpoint for this API (if not, the API will still be accessible via the load balancer) (default: public, unless disabled cluster-wide) + apis: # list of Realtime APIs to target + - name: # name of a Realtime API that is already running or is included in the same configuration file (required) + weight: # percentage of traffic to route to the Realtime API (all weights must sum to 100) (required) +``` diff --git a/docs/tutorials/traffic-splitter.md b/docs/workloads/traffic-splitter/deploy.md similarity index 80% rename from docs/tutorials/traffic-splitter.md rename to docs/workloads/traffic-splitter/deploy.md index 1191ab6c91..01ce67ec97 100644 --- a/docs/tutorials/traffic-splitter.md +++ b/docs/workloads/traffic-splitter/deploy.md @@ -1,8 +1,10 @@ -# Traffic splitter +# TrafficSplitter -A Traffic Splitter can be used expose multiple APIs as a single endpoint. The percentage of traffic routed to each API can be controlled. This can be useful when performing A/B tests, setting up multi-armed bandits or performing canary deployments. +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -**Note: Traffic Splitter is only supported on a Cortex cluster** +Expose multiple RealtimeAPIs as a single endpoint for A/B tests, multi-armed bandits, or canary deployments. + +**Note: TrafficSplitter is only supported on a Cortex cluster** ## Deploy APIs diff --git a/pkg/workloads/cortex/client/setup.py b/pkg/workloads/cortex/client/setup.py index c2708cf34f..25253854da 100644 --- a/pkg/workloads/cortex/client/setup.py +++ b/pkg/workloads/cortex/client/setup.py @@ -115,8 +115,8 @@ def run(self): ], project_urls={ "Bug Reports": "https://github.com/cortexlabs/cortex/issues", - "Chat with us": "https://gitter.im/cortexlabs/cortex", - "Documentation": "https://docs.cortex.dev", + "Community": "https://gitter.im/cortexlabs/cortex", + "Docs": "https://docs.cortex.dev", "Source Code": "https://github.com/cortexlabs/cortex", }, ) From 7ecbd41c11d6626b9dd247f01c3068680d93fa1d Mon Sep 17 00:00:00 2001 From: Caleb Kaiser Date: Wed, 16 Dec 2020 12:21:15 -0500 Subject: [PATCH 05/11] 301 moved pages --- .gitbook.yaml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/.gitbook.yaml b/.gitbook.yaml index 1977d6450c..8e9f47bdb6 100644 --- a/.gitbook.yaml +++ b/.gitbook.yaml @@ -3,3 +3,29 @@ root: ./docs/ structure: readme: ./tutorials/realtime.md summary: summary.md + +redirects: + contact: ./summary.md + miscellaneous/security: ./running-on-aws/security + workloads/telemetry: ./cli/telemetry + guides/production: ./tutorials/realtime + deployments/compute: ./workloads/realtime/configuration + advanced/compute: ./running-on-aws/gpu + miscellaneous/environments: ./cli/environments + miscellaneous/cli: ./cli/commands + contributing/development: contributing + workloads/realtime-api: ./tutorials/realtime + deployments/realtime-api: ./tutorials/realtime + workloads/batch-api: ./tutorials/batch + guides/ssh-instance: ./running-on-aws/ssh + guides/windows-cli: ./cli/install + troubleshooting/cluster-down: ./summary.md + miscellaneous/python-client: ./cli/python-client + guides/kubectl-setup: ./guides/kubectl + */install: ./cli/install + */miscellaneous/cli: ./cli/install + */advanced/inferentia: ./running-on-aws/inferentia + v/master/advanced/networking: ./running-on-aws/networking + guides/single-node-deployment: ./tutorials/realtime + */advanced/gpus: ./running-on-aws/gpu + deployments/realtime-api/parallelism: ./workloads/realtime/parallelism From b94032b510dd3a5c1f73029b398ef6bafd0dde99 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Wed, 16 Dec 2020 16:15:23 -0800 Subject: [PATCH 06/11] Update .gitbook.yaml --- .gitbook.yaml | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/.gitbook.yaml b/.gitbook.yaml index 8e9f47bdb6..1977d6450c 100644 --- a/.gitbook.yaml +++ b/.gitbook.yaml @@ -3,29 +3,3 @@ root: ./docs/ structure: readme: ./tutorials/realtime.md summary: summary.md - -redirects: - contact: ./summary.md - miscellaneous/security: ./running-on-aws/security - workloads/telemetry: ./cli/telemetry - guides/production: ./tutorials/realtime - deployments/compute: ./workloads/realtime/configuration - advanced/compute: ./running-on-aws/gpu - miscellaneous/environments: ./cli/environments - miscellaneous/cli: ./cli/commands - contributing/development: contributing - workloads/realtime-api: ./tutorials/realtime - deployments/realtime-api: ./tutorials/realtime - workloads/batch-api: ./tutorials/batch - guides/ssh-instance: ./running-on-aws/ssh - guides/windows-cli: ./cli/install - troubleshooting/cluster-down: ./summary.md - miscellaneous/python-client: ./cli/python-client - guides/kubectl-setup: ./guides/kubectl - */install: ./cli/install - */miscellaneous/cli: ./cli/install - */advanced/inferentia: ./running-on-aws/inferentia - v/master/advanced/networking: ./running-on-aws/networking - guides/single-node-deployment: ./tutorials/realtime - */advanced/gpus: ./running-on-aws/gpu - deployments/realtime-api/parallelism: ./workloads/realtime/parallelism From 79a71c7631ca37637e94b1cfb220641d08ff77e3 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Thu, 17 Dec 2020 11:31:07 -0800 Subject: [PATCH 07/11] Update docs --- docs/workloads/batch/deploy.md | 2 ++ docs/workloads/multi-model/deploy.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/workloads/batch/deploy.md b/docs/workloads/batch/deploy.md index 104e7040e5..a26a12a344 100644 --- a/docs/workloads/batch/deploy.md +++ b/docs/workloads/batch/deploy.md @@ -1,5 +1,7 @@ # BatchAPI +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ + Deploy batch APIs that can orchestrate distributed batch inference jobs on large datasets. ## Key features diff --git a/docs/workloads/multi-model/deploy.md b/docs/workloads/multi-model/deploy.md index 2d2248183d..ba7baa53da 100644 --- a/docs/workloads/multi-model/deploy.md +++ b/docs/workloads/multi-model/deploy.md @@ -1,5 +1,7 @@ # Multi-model API +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ + Deploy several models in a single API to improve resource utilization efficiency. ### Define a multi-model API From 3d126818a1125a71ecc2ff02606a2d431511a4ab Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Thu, 17 Dec 2020 11:38:48 -0800 Subject: [PATCH 08/11] Update .gitbook.yaml --- .gitbook.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitbook.yaml b/.gitbook.yaml index 1977d6450c..0de5400f7f 100644 --- a/.gitbook.yaml +++ b/.gitbook.yaml @@ -1,5 +1,5 @@ root: ./docs/ structure: - readme: ./tutorials/realtime.md + readme: ./workloads/realtime/deploy.md summary: summary.md From 7a2651de1f3c244824c72e84f1a049d948aee261 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Thu, 17 Dec 2020 11:45:35 -0800 Subject: [PATCH 09/11] Update summary.md --- docs/summary.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/summary.md b/docs/summary.md index 4dbb90e388..11c2624f32 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -51,6 +51,9 @@ * [Configuration](workloads/batch/configuration.md) * [Statuses](workloads/batch/statuses.md) * [Endpoints](workloads/batch/endpoints.md) +* Multi-model + * [Deploy](workloads/multi-model/deploy.md) + * [Configuration](workloads/multi-model/configuration.md) * Traffic Splitter * [Deploy](workloads/traffic-splitter/deploy.md) * [Python packages](workloads/traffic-splitter/configuration.md) From 898f7bb914321282312ca045df3dfc37fb2a504b Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Thu, 17 Dec 2020 11:47:09 -0800 Subject: [PATCH 10/11] Update docs --- docs/workloads/multi-model/{multi-model.md => configuration.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/workloads/multi-model/{multi-model.md => configuration.md} (100%) diff --git a/docs/workloads/multi-model/multi-model.md b/docs/workloads/multi-model/configuration.md similarity index 100% rename from docs/workloads/multi-model/multi-model.md rename to docs/workloads/multi-model/configuration.md From 593a15cc6e4ee0ee1064df024830a8b394693286 Mon Sep 17 00:00:00 2001 From: David Eliahu Date: Thu, 17 Dec 2020 11:52:20 -0800 Subject: [PATCH 11/11] Update lint --- build/lint-docs.sh | 1 - build/lint.sh | 1 - 2 files changed, 2 deletions(-) diff --git a/build/lint-docs.sh b/build/lint-docs.sh index ee493d4a9c..b7d353f7a3 100755 --- a/build/lint-docs.sh +++ b/build/lint-docs.sh @@ -32,7 +32,6 @@ fi output=$(cd "$ROOT/docs" && find . -type f \ ! -path "./README.md" \ ! -name "summary.md" \ -! -path "./tutorials/*" \ ! -name "contributing.md" \ -exec grep -L "WARNING: you are on the master branch, please refer to the docs on the branch that matches your \`cortex version\`" {} \;) if [[ $output ]]; then diff --git a/build/lint.sh b/build/lint.sh index 22516a22a2..f0028e92bf 100755 --- a/build/lint.sh +++ b/build/lint.sh @@ -142,7 +142,6 @@ else output=$(cd "$ROOT/docs" && find . -type f \ ! -path "./README.md" \ ! -name "summary.md" \ - ! -path "./tutorials/*" \ ! -name "contributing.md" \ ! -name "*.json" \ ! -name "*.txt" \