Skip to content

Commit 70b257c

Browse files
authored
ci/gpu: fix install future & use local cache (#16929)
1 parent 2f17d1b commit 70b257c

File tree

3 files changed

+11
-5
lines changed

3 files changed

+11
-5
lines changed

.azure/gpu-tests-fabric.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,12 @@ jobs:
4747
variables:
4848
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
4949
FREEZE_REQUIREMENTS: "1"
50+
PIP_CACHE_DIR: "/var/tmp/pip"
5051
container:
5152
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.13-cuda11.7.1"
5253
# default shm size is 64m. Increase it to avoid:
5354
# 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
54-
options: "--gpus=all --shm-size=2gb"
55+
options: "--gpus=all --shm-size=2gb -v /var/tmp:/var/tmp"
5556
strategy:
5657
matrix:
5758
'pkg: Fabric':

.azure/gpu-tests-pytorch.yml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ jobs:
5656
scope: ""
5757
PACKAGE_NAME: "pytorch"
5858
'PyTorch | future':
59-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.13-cuda11.7.1"
59+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch1.13-cuda11.7.1"
6060
scope: "future"
6161
PACKAGE_NAME: "pytorch"
6262
'Lightning | latest':
@@ -67,11 +67,12 @@ jobs:
6767
variables:
6868
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
6969
FREEZE_REQUIREMENTS: "1"
70+
PIP_CACHE_DIR: "/var/tmp/pip"
7071
container:
7172
image: $(image)
7273
# default shm size is 64m. Increase it to avoid:
7374
# 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
74-
options: "--gpus=all --shm-size=2gb"
75+
options: "--gpus=all --shm-size=2gb -v /var/tmp:/var/tmp"
7576
workspace:
7677
clean: all
7778
steps:
@@ -114,8 +115,11 @@ jobs:
114115
displayName: 'Install package & extras'
115116

116117
- bash: |
118+
pip install -U -q pip
117119
pip uninstall -y torch torchvision
118-
pip install torch torchvision -U --pre --no-cache -f https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM}/torch_test.html
120+
pip install "torch==2.0.0" "torchvision==0.15.0" -U --pre \
121+
-f "https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM}/torch_test.html" \
122+
-f "https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION_MM}/torch_nightly.html"
119123
python -c "from torch import __version__ as ver; assert ver.startswith('2.0.0'), ver"
120124
condition: eq(variables['scope'], 'future')
121125
displayName: 'bump to future'

.github/workflows/ci-dockers-pytorch.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ on:
88
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
99
paths:
1010
- ".actions/**"
11-
- ".github/workflows/ci-pytorch-dockers.yml"
11+
- ".github/workflows/ci-dockers-pytorch.yml"
1212
- "dockers/**"
1313
- "requirements/*.txt"
1414
- "requirements/pytorch/**"
@@ -105,6 +105,7 @@ jobs:
105105
- {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
106106
- {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.1"}
107107
- {python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.7.1"}
108+
- {python_version: "3.10", pytorch_version: "1.13", cuda_version: "11.7.1"}
108109
steps:
109110
- uses: actions/checkout@v3
110111
- uses: docker/setup-buildx-action@v2

0 commit comments

Comments
 (0)