diff --git a/.github/workflows/regression_tests_cpu.yml b/.github/workflows/regression_tests_cpu.yml index 58247119ab..07aec6270c 100644 --- a/.github/workflows/regression_tests_cpu.yml +++ b/.github/workflows/regression_tests_cpu.yml @@ -1,6 +1,9 @@ name: Run Regression Tests on CPU -on: workflow_dispatch +on: + # runs every Sunday at 11:15am + schedule: + - cron: '15 11 * * 7' jobs: regression-cpu: diff --git a/.github/workflows/regression_tests_gpu.yml b/.github/workflows/regression_tests_gpu.yml index 6a16fce8f7..eb6ab812a2 100644 --- a/.github/workflows/regression_tests_gpu.yml +++ b/.github/workflows/regression_tests_gpu.yml @@ -1,6 +1,9 @@ name: Run Regression Tests on GPU -on: workflow_dispatch +on: + # runs every Sunday at 11:15am + schedule: + - cron: '15 11 * * 7' jobs: regression-gpu: @@ -9,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - cuda: ["cu116", "cu117"] + cuda: ["cu117", "cu118"] steps: - name: Clean up previous run run: | diff --git a/docker/README.md b/docker/README.md index 33a8172ea5..fc65749532 100644 --- a/docker/README.md +++ b/docker/README.md @@ -36,7 +36,7 @@ Use `build_image.sh` script to build the docker images. The script builds the `p |-g, --gpu|Build image with GPU based ubuntu base image| |-bt, --buildtype|Which type of docker image to build. Can be one of : production, dev, codebuild| |-t, --tag|Tag name for image. If not specified, script uses torchserve default tag names.| -|-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`. Default `cu117`| +|-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118`. Default `cu117`| |-ipex, --build-with-ipex| Specify to build with intel_extension_for_pytorch. If not specified, script builds without intel_extension_for_pytorch.| |--codebuild| Set if you need [AWS CodeBuild](https://aws.amazon.com/codebuild/)| |-py, --pythonversion| Specify the python version to use. Supported values `3.8`, `3.9`, `3.10`. Default `3.9`| diff --git a/docker/build_image.sh b/docker/build_image.sh index f16b787dd2..246f81bdac 100755 --- a/docker/build_image.sh +++ b/docker/build_image.sh @@ -80,7 +80,10 @@ do # With default ubuntu version 20.04 -cv|--cudaversion) CUDA_VERSION="$2" - if [ $CUDA_VERSION == "cu117" ]; + if [ $CUDA_VERSION == "cu118" ]; + then + BASE_IMAGE="nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04" + elif [ $CUDA_VERSION == "cu117" ]; then BASE_IMAGE="nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04" elif [ $CUDA_VERSION == "cu116" ]; diff --git a/docs/README.md b/docs/README.md index 355a6e0268..8055e661ff 100644 --- a/docs/README.md +++ b/docs/README.md @@ -52,3 +52,4 @@ TorchServe is a performant, flexible and easy to use tool for serving PyTorch ea * [TorchServe on Kubernetes](https://github.com/pytorch/serve/blob/master/kubernetes/README.md#torchserve-on-kubernetes) - Demonstrates a Torchserve deployment in Kubernetes using Helm Chart supported in both Azure Kubernetes Service and Google Kubernetes service * [mlflow-torchserve](https://github.com/mlflow/mlflow-torchserve) - Deploy mlflow pipeline models into TorchServe * [Kubeflow pipelines](https://github.com/kubeflow/pipelines/tree/master/samples/contrib/pytorch-samples) - Kubeflow pipelines and Google Vertex AI Managed pipelines +* [NVIDIA MPS](mps.md) - Use NVIDIA MPS to optimize multi-worker deployment on a single GPU diff --git a/docs/code_coverage.md b/docs/code_coverage.md index 6f8a746bc3..bda87f1d0f 100644 --- a/docs/code_coverage.md +++ b/docs/code_coverage.md @@ -12,7 +12,7 @@ ```bash python ts_scripts/install_dependencies.py --environment=dev --cuda=cu102 ``` - > Supported cuda versions as cu117, cu116, cu113, cu111, cu102, cu101, cu92 + > Supported cuda versions as cu118, cu117, cu116, cu113, cu111, cu102, cu101, cu92 - Execute sanity suite ```bash diff --git a/docs/configuration.md b/docs/configuration.md index 167bf8da2c..5d190f8ac1 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -288,7 +288,7 @@ the backend workers convert "Bytearray to utf-8 string" when the Content-Type of * `max_request_size` : The maximum allowable request size that the Torchserve accepts, in bytes. Default: 6553500 * `max_response_size` : The maximum allowable response size that the Torchserve sends, in bytes. Default: 6553500 * `limit_max_image_pixels` : Default value is true (Use default [PIL.Image.MAX_IMAGE_PIXELS](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS)). If this is set to "false", set PIL.Image.MAX_IMAGE_PIXELS = None in backend default vision handler for large image payload. -* `allowed_urls` : Comma separated regex of allowed source URL(s) from where models can be registered. Default: "file://.*|http(s)?://.*" (all URLs and local file system) +* `allowed_urls` : Comma separated regex of allowed source URL(s) from where models can be registered. Default: `file://.*|http(s)?://.*` (all URLs and local file system) e.g. : To allow base URLs `https://s3.amazonaws.com/` and `https://torchserve.pytorch.org/` use the following regex string `allowed_urls=https://s3.amazonaws.com/.*,https://torchserve.pytorch.org/.*` * `workflow_store` : Path of workflow store directory. Defaults to model store directory. * `disable_system_metrics` : Disable collection of system metrics when set to "true". Default value is "false". diff --git a/docs/getting_started.md b/docs/getting_started.md index 029b67471e..11daf6db7b 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -14,7 +14,7 @@ python ./ts_scripts/install_dependencies.py ``` - - For GPU with Cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117` + - For GPU with Cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118` ```bash python ./ts_scripts/install_dependencies.py --cuda=cu102 diff --git a/docs/images/mps_g4_single.png b/docs/images/mps_g4_single.png new file mode 100644 index 0000000000..a35b79abd2 Binary files /dev/null and b/docs/images/mps_g4_single.png differ diff --git a/docs/images/mps_g4_two_worker.png b/docs/images/mps_g4_two_worker.png new file mode 100644 index 0000000000..7dcbf53bec Binary files /dev/null and b/docs/images/mps_g4_two_worker.png differ diff --git a/docs/images/mps_p3_single.png b/docs/images/mps_p3_single.png new file mode 100644 index 0000000000..be1b88f9d2 Binary files /dev/null and b/docs/images/mps_p3_single.png differ diff --git a/docs/images/mps_p3_two_worker.png b/docs/images/mps_p3_two_worker.png new file mode 100644 index 0000000000..06339999d8 Binary files /dev/null and b/docs/images/mps_p3_two_worker.png differ diff --git a/docs/mps.md b/docs/mps.md new file mode 100644 index 0000000000..70cd1f93d2 --- /dev/null +++ b/docs/mps.md @@ -0,0 +1,91 @@ +# Enabling NVIDIA MPS in TorchServe +In order to deploy ML models, TorchServe spins up each worker in a separate processes, thus isolating each worker from the others. +Each process creates its own CUDA context to execute its kernels and access the allocated memory. + +While NVIDIA GPUs in their default setting allow multiple processes to run CUDA kernels on a single device it involves the following drawback: +* The execution of the kernels is generally serialized +* Each processes creates its own CUDA context which occupies additional GPU memory + +For these scenarios NVIDIA offers the Multi-Process Service (MPS) which: +* Allows multiple processes to share the same CUDA context on the same GPU +* Run their kernels in a parallel fashion + +This can result in: +* Increased performance when using multiple workers on the same GPU +* Decreased GPU memory utilization due to the shared context + + +To leverage the benefits of NVIDIA MPS we need to start the MPS daemon with the following commands before starting up TorchServe itself. +``` +sudo nvidia-smi -c 3 +nvidia-cuda-mps-control -d +``` +The first command enables the exclusive processing mode for the GPU allowing only one process (the MPS daemon) to utilize it. +The second command starts the MPS daemon itself. +To shutdown the daemon we can execute: +``` +echo quit | nvidia-cuda-mps-control +``` +For more details on MPS please refer to [NVIDIA's MPS documentation](https://docs.nvidia.com/deploy/mps/index.html). +It should be noted that MPS only allows 48 processes (for Volta GPUs) to connect to the daemon due limited hardware resources. +Adding more clients/workers (to the same GPU) will lead to a failure. + +## Benchmarks +To show the performance of TorchServe with activated MPS and help to the decision in enabling MPS for your deployment or not we will perform some benchmarks with representative workloads. + +Primarily, we want to investigate how the throughput of a worker evolves with activated MPS for different operation points. +As an example work load for our benchmark we select the [HuggingFace Transformers Sequence Classification example](https://github.com/pytorch/serve/tree/master/examples/Huggingface_Transformers#sequence-classification). +We perform the benchmark on a g4dn.4xlarge as well as a p3.2xlarge instance on AWS. +Both instance types provide one GPU per instance which will result in multiple workers to be scheduled on the same GPU. +For the benchmark we concentrate on the model throughput as measured by the [benchmark-ab.py](https://github.com/pytorch/serve/tree/master/benchmarks/benchmark-ab.py) tool. + +First, we measure the throughput of a single worker for different batch sizes as it will show us at which point the compute resources of the GPU are fully occupied. +Second, we measure the throughput with two deployed workers for the batch sizes where we expect the GPUs to have still some resources left over to share. +For each benchmark we perform five runs and take the median over the runs. + +We use the following config.json for the benchmark, only overwriting the number of workers and the batch size accordingly. + +``` +{ + "url":"/home/ubuntu/serve/examples/Huggingface_Transformers/model_store/BERTSeqClassification", + "requests": 10000, + "concurrency": 600, + "input": "/home/ubuntu/serve/examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text_captum_input.txt", + "workers": "1" +} +``` +Please note that we set the concurrency level to 600 which will make sure that the batch aggregation inside TorchServe fills up the batches to the maximum batch size. But concurrently this will skew the latency measurements as many requests will be waiting in the queue to be processed. We will therefore neglect the latency measurements in the following. + +### G4 Instance +We first perform the single worker benchmark for the G4 instance. +In the figure below we see that up to a batch size of four we see a steady increase of the throughput over the batch size. + +![G4 benchmark, single worker](images/mps_g4_single.png) + +Next, we increase the number of workers to two in order to compare the throughput with and without MPS running. +To enable MPS for the second set of runs we first set the exclusive processing mode for the GPU and then start the MPS daemon as shown above. + +We select the batch size between one and eight according to our previous findings. +In the figure we can see that the performance in terms of throughput can be better in case of batch size 1 and 8 (up to +18%) while it can be worse for others (-11%). +An interpretation of this result could be that the G4 instance has not many resources to share when we run a BERT model in one of the workers. + +![G4 benchmark, two workers](images/mps_g4_two_worker.png) + +### P3 instance +Next, we will run the same experiment with the bigger p3.2xlarge instance. +With a single worker we get the following throughput values: + +![P3 benchmark, single worker](images/mps_p3_single.png) + +We can see that the throughput steady increases but for a batch size over eight we see diminishing returns. +Finally, we deploy two workers on the P3 instance and compare running them with and without MPS. +We can see that for batch size between 1 and 32 the throughput is consistently higher (up to +25%) for MPS enabled with the exception of batch size 16. + +![P3 benchmark, two workers](images/mps_p3_two_worker.png) + +## Summary +In the previous section we saw that by enabling MPS for two workers running the same model we receive mixed results. +For the smaller G4 instance we only saw benefits in certain operation points while we saw more consistent improvements for the bigger P3 instance. +This suggests that the benefit in terms of throughput for running a deployment with MPS are highly workload and environment dependent and need to be determined for specific situations using appropriate benchmarks and tools. +It should be noted that the previous benchmark solely focused on throughput and neglected latency and memory footprint. +As using MPS will only create a single CUDA context more workers can be packed to the same GPU which needs to be considered as well in the according scenarios. diff --git a/model-archiver/model_archiver/tests/integ_tests/test_integration_model_archiver.py b/model-archiver/model_archiver/tests/integ_tests/test_integration_model_archiver.py index 89b4a907ba..c68a3a45c9 100644 --- a/model-archiver/model_archiver/tests/integ_tests/test_integration_model_archiver.py +++ b/model-archiver/model_archiver/tests/integ_tests/test_integration_model_archiver.py @@ -1,16 +1,23 @@ -import platform -import time -from datetime import datetime import errno import json import os +import platform import shutil -import tempfile import subprocess +import tempfile +import time +from datetime import datetime +from pathlib import Path + import model_archiver DEFAULT_RUNTIME = "python" MANIFEST_FILE = "MAR-INF/MANIFEST.json" +INTEG_TEST_CONFIG_FILE = "integ_tests/configuration.json" +DEFAULT_HANDLER_CONFIG_FILE = "integ_tests/default_handler_configuration.json" + +TEST_ROOT_DIR = Path(__file__).parents[1] +MODEL_ARCHIVER_ROOT_DIR = Path(__file__).parents[3] def create_file_path(path): @@ -49,11 +56,17 @@ def run_test(test, cmd): def validate_archive_exists(test): fmt = test.get("archive-format") if fmt == "tgz": - assert os.path.isfile(os.path.join(test.get("export-path"), test.get("model-name")+".tar.gz")) + assert os.path.isfile( + os.path.join(test.get("export-path"), test.get("model-name") + ".tar.gz") + ) elif fmt == "no-archive": - assert os.path.isdir(os.path.join(test.get("export-path"), test.get("model-name"))) + assert os.path.isdir( + os.path.join(test.get("export-path"), test.get("model-name")) + ) else: - assert os.path.isfile(os.path.join(test.get("export-path"), test.get("model-name")+".mar")) + assert os.path.isfile( + os.path.join(test.get("export-path"), test.get("model-name") + ".mar") + ) def validate_manifest_file(manifest, test, default_handler=None): @@ -67,7 +80,9 @@ def validate_manifest_file(manifest, test, default_handler=None): assert manifest.get("runtime") == test.get("runtime") assert manifest.get("model").get("modelName") == test.get("model-name") if not default_handler: - assert manifest.get("model").get("handler") == test.get("handler").split("/")[-1] + assert ( + manifest.get("model").get("handler") == test.get("handler").split("/")[-1] + ) else: assert manifest.get("model").get("handler") == test.get("handler") assert manifest.get("archiverVersion") == model_archiver.__version__ @@ -87,21 +102,29 @@ def validate_files(file_list, prefix, default_handler=None): def validate_tar_archive(test_cfg): import tarfile - file_name = os.path.join(test_cfg.get("export-path"), test_cfg.get("model-name") + ".tar.gz") + + file_name = os.path.join( + test_cfg.get("export-path"), test_cfg.get("model-name") + ".tar.gz" + ) f = tarfile.open(file_name, "r:gz") - manifest = json.loads(f.extractfile(os.path.join(test_cfg.get("model-name"), MANIFEST_FILE)).read()) + manifest = json.loads( + f.extractfile(os.path.join(test_cfg.get("model-name"), MANIFEST_FILE)).read() + ) validate_manifest_file(manifest, test_cfg) validate_files(f.getnames(), test_cfg.get("model-name")) def validate_noarchive_archive(test): - file_name = os.path.join(test.get("export-path"), test.get("model-name"), MANIFEST_FILE) + file_name = os.path.join( + test.get("export-path"), test.get("model-name"), MANIFEST_FILE + ) manifest = json.loads(open(file_name).read()) validate_manifest_file(manifest, test) def validate_mar_archive(test): import zipfile + file_name = os.path.join(test.get("export-path"), test.get("model-name") + ".mar") zf = zipfile.ZipFile(file_name, "r") manifest = json.loads(zf.open(MANIFEST_FILE).read()) @@ -124,8 +147,17 @@ def validate(test): def build_cmd(test): - args = ['model-name', 'model-file', 'serialized-file', 'handler', 'extra-files', 'archive-format', - 'version', 'export-path', 'runtime'] + args = [ + "model-name", + "model-file", + "serialized-file", + "handler", + "extra-files", + "archive-format", + "version", + "export-path", + "runtime", + ] cmd = ["torch-model-archiver"] @@ -136,19 +168,42 @@ def build_cmd(test): return " ".join(cmd) +def make_paths_absolute(test, keys): + def make_absolute(paths): + if "," in paths: + return ",".join([make_absolute(p) for p in paths.split(",")]) + return MODEL_ARCHIVER_ROOT_DIR.joinpath(paths).as_posix() + + for k in keys: + test[k] = make_absolute(test[k]) + + return test + + def test_model_archiver(): - with open("model_archiver/tests/integ_tests/configuration.json", "r") as f: + with open(TEST_ROOT_DIR.joinpath(INTEG_TEST_CONFIG_FILE), "r") as f: tests = json.loads(f.read()) + keys = ( + "model-file", + "serialized-file", + "handler", + "extra-files", + ) + tests = [make_paths_absolute(t, keys) for t in tests] for test in tests: # tar.gz format problem on windows hence ignore - if platform.system() == "Windows" and test['archive-format'] == 'tgz': + if platform.system() == "Windows" and test["archive-format"] == "tgz": continue try: - test["export-path"] = os.path.join(tempfile.gettempdir(), test["export-path"]) + test["export-path"] = os.path.join( + tempfile.gettempdir(), test["export-path"] + ) delete_file_path(test.get("export-path")) create_file_path(test.get("export-path")) test["runtime"] = test.get("runtime", DEFAULT_RUNTIME) - test["model-name"] = test["model-name"] + '_' + str(int(time.time()*1000.0)) + test["model-name"] = ( + test["model-name"] + "_" + str(int(time.time() * 1000.0)) + ) cmd = build_cmd(test) if test.get("force"): cmd += " -f" @@ -160,8 +215,14 @@ def test_model_archiver(): def test_default_handlers(): - with open("model_archiver/tests/integ_tests/default_handler_configuration.json", "r") as f: + with open(TEST_ROOT_DIR.joinpath(DEFAULT_HANDLER_CONFIG_FILE), "r") as f: tests = json.loads(f.read()) + keys = ( + "model-file", + "serialized-file", + "extra-files", + ) + tests = [make_paths_absolute(t, keys) for t in tests] for test in tests: cmd = build_cmd(test) try: diff --git a/model-archiver/model_archiver/tests/unit_tests/test_version.py b/model-archiver/model_archiver/tests/unit_tests/test_version.py index 9571155338..d72e20d66f 100644 --- a/model-archiver/model_archiver/tests/unit_tests/test_version.py +++ b/model-archiver/model_archiver/tests/unit_tests/test_version.py @@ -1,15 +1,16 @@ +from pathlib import Path - -import os import model_archiver +MODEL_ARCHIVER_ROOT_DIR = Path(__file__).parent.parent.parent + def test_model_export_tool_version(): """ Test the model archive version :return: """ - with open(os.path.join('model_archiver', 'version.txt')) as f: + with open(MODEL_ARCHIVER_ROOT_DIR.joinpath("version.txt")) as f: __version__ = f.readline().strip() assert __version__ == str(model_archiver.__version__), "Versions do not match" diff --git a/requirements/torch_cu117_linux.txt b/requirements/torch_cu117_linux.txt index 083c68ecff..4840f5b08d 100644 --- a/requirements/torch_cu117_linux.txt +++ b/requirements/torch_cu117_linux.txt @@ -3,7 +3,7 @@ cython wheel pillow==9.3.0 -torch==1.13.1+cu117; sys_platform == 'linux' -torchvision==0.14.1+cu117; sys_platform == 'linux' -torchtext==0.14.1; sys_platform == 'linux' -torchaudio==0.13.1+cu117; sys_platform == 'linux' +torch==2.0.0+cu117; sys_platform == 'linux' +torchvision==0.15.1+cu117; sys_platform == 'linux' +torchtext==0.15.1; sys_platform == 'linux' +torchaudio==2.0.1+cu117; sys_platform == 'linux' diff --git a/requirements/torch_cu117_windows.txt b/requirements/torch_cu117_windows.txt index c51f59d627..32e07cf15a 100644 --- a/requirements/torch_cu117_windows.txt +++ b/requirements/torch_cu117_windows.txt @@ -1,6 +1,6 @@ #pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117 --extra-index-url https://download.pytorch.org/whl/cu117 -torch==1.13.1+cu117; sys_platform == 'win32' -torchvision==0.14.1+cu117; sys_platform == 'win32' -torchtext==0.14.1; sys_platform == 'win32' -torchaudio==0.13.1+cu117; sys_platform == 'win32' +torch==2.0.0+cu117; sys_platform == 'win32' +torchvision==0.15.1+cu117; sys_platform == 'win32' +torchtext==0.15.1; sys_platform == 'win32' +torchaudio==2.0.1+cu117; sys_platform == 'win32' diff --git a/requirements/torch_cu118_linux.txt b/requirements/torch_cu118_linux.txt new file mode 100644 index 0000000000..03e50bcece --- /dev/null +++ b/requirements/torch_cu118_linux.txt @@ -0,0 +1,9 @@ +#pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117 +--extra-index-url https://download.pytorch.org/whl/cu118 +cython +wheel +pillow==9.3.0 +torch==2.0.0+cu118; sys_platform == 'linux' +torchvision==0.15.1+cu118; sys_platform == 'linux' +torchtext==0.15.1; sys_platform == 'linux' +torchaudio==2.0.1+cu118; sys_platform == 'linux' diff --git a/requirements/torch_cu118_windows.txt b/requirements/torch_cu118_windows.txt new file mode 100644 index 0000000000..03e50bcece --- /dev/null +++ b/requirements/torch_cu118_windows.txt @@ -0,0 +1,9 @@ +#pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117 +--extra-index-url https://download.pytorch.org/whl/cu118 +cython +wheel +pillow==9.3.0 +torch==2.0.0+cu118; sys_platform == 'linux' +torchvision==0.15.1+cu118; sys_platform == 'linux' +torchtext==0.15.1; sys_platform == 'linux' +torchaudio==2.0.1+cu118; sys_platform == 'linux' diff --git a/requirements/torch_darwin.txt b/requirements/torch_darwin.txt index 9cc30c8440..6b0cfdc1d0 100644 --- a/requirements/torch_darwin.txt +++ b/requirements/torch_darwin.txt @@ -1,5 +1,5 @@ #pip install torch torchvision torchaudio -torch==1.13.1; sys_platform == 'darwin' -torchvision==0.14.1; sys_platform == 'darwin' -torchtext==0.14.1; sys_platform == 'darwin' -torchaudio==0.13.1; sys_platform == 'darwin' +torch==2.0.0; sys_platform == 'darwin' +torchvision==0.15.1; sys_platform == 'darwin' +torchtext==0.15.1; sys_platform == 'darwin' +torchaudio==2.0.1; sys_platform == 'darwin' diff --git a/requirements/torch_linux.txt b/requirements/torch_linux.txt index 116ce16686..c47ebbe080 100644 --- a/requirements/torch_linux.txt +++ b/requirements/torch_linux.txt @@ -3,7 +3,7 @@ cython wheel pillow==9.3.0 -torch==1.13.1+cpu; sys_platform == 'linux' -torchvision==0.14.1+cpu; sys_platform == 'linux' -torchtext==0.14.1; sys_platform == 'linux' -torchaudio==0.13.1+cpu; sys_platform == 'linux' +torch==2.0.0+cpu; sys_platform == 'linux' +torchvision==0.15.1+cpu; sys_platform == 'linux' +torchtext==0.15.1; sys_platform == 'linux' +torchaudio==2.0.1+cpu; sys_platform == 'linux' diff --git a/requirements/torch_windows.txt b/requirements/torch_windows.txt index 7552033ba3..2a744482b1 100644 --- a/requirements/torch_windows.txt +++ b/requirements/torch_windows.txt @@ -1,7 +1,7 @@ #pip install torch torchvision torchaudio wheel -torch==1.13.1; sys_platform == 'win32' -torchvision==0.14.1; sys_platform == 'win32' -torchtext==0.14.1; sys_platform == 'win32' -torchaudio==0.13.1; sys_platform == 'win32' +torch==2.0.0; sys_platform == 'win32' +torchvision==0.15.1; sys_platform == 'win32' +torchtext==0.15.1; sys_platform == 'win32' +torchaudio==2.0.1; sys_platform == 'win32' pillow==9.3.0 diff --git a/test/pytest/conftest.py b/test/pytest/conftest.py index 7a72a0f34d..6b16b5a6e8 100644 --- a/test/pytest/conftest.py +++ b/test/pytest/conftest.py @@ -13,6 +13,7 @@ collect_ignore = [] collect_ignore.append("test_example_torchrec_dlrm.py") collect_ignore.append("test_example_near_real_time_video.py") +collect_ignore.append("test_dali_preprocess.py") @pytest.fixture(scope="module") diff --git a/ts_scripts/install_dependencies.py b/ts_scripts/install_dependencies.py index 86a7a9755b..d922fa2440 100644 --- a/ts_scripts/install_dependencies.py +++ b/ts_scripts/install_dependencies.py @@ -181,7 +181,7 @@ def get_brew_version(): parser.add_argument( "--cuda", default=None, - choices=["cu92", "cu101", "cu102", "cu111", "cu113", "cu116", "cu117"], + choices=["cu92", "cu101", "cu102", "cu111", "cu113", "cu116", "cu117", "cu118"], help="CUDA version for torch", ) parser.add_argument( diff --git a/ts_scripts/spellcheck_conf/wordlist.txt b/ts_scripts/spellcheck_conf/wordlist.txt index e348891c4f..8bfc5adeb9 100644 --- a/ts_scripts/spellcheck_conf/wordlist.txt +++ b/ts_scripts/spellcheck_conf/wordlist.txt @@ -1027,3 +1027,5 @@ Snyk pythonversion StreamPredictions LLMs +MPS +mps