Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
00ef571
Added Windows/MacOSX CI for py3.7 only (#1113)
vfdev-5 Jun 8, 2020
b4b2335
[FR] Parallel helper tools (#1014) (#1116)
vfdev-5 Jun 11, 2020
0e67f06
Fixes #1120 (#1122)
vfdev-5 Jun 11, 2020
4ab100e
reverse order of remove/save in Checkpoint handling (#1117)
erip Jun 12, 2020
6b88eb8
Fix test auto tpu (#1126)
vfdev-5 Jun 13, 2020
3b1dc3a
Auto pin_memory (#1129)
InCogNiTo124 Jun 14, 2020
db63e94
fix auto pin_memory : idist.device().type should be used (#1131)
sdesrozis Jun 15, 2020
f833124
Update pascal voc12 example (#1125)
vfdev-5 Jun 16, 2020
5837fe2
fix cifar10 model : num_classes missing (#1134)
sdesrozis Jun 16, 2020
ce00b72
Accuracy MultiLabel Handling and Error Message (#1132)
anmolsjoshi Jun 16, 2020
5477d86
Updated ImageNet example (#1138)
vfdev-5 Jun 18, 2020
a10bf6e
Updated pytorch-version-tests.yml to run cron every day at 00:00 UTC …
anmolsjoshi Jun 18, 2020
454e1a3
Added check_compute_fn argument to EpochMetric and related metrics (#…
anmolsjoshi Jun 19, 2020
04bdede
Docs cosmetics (#1142)
vfdev-5 Jun 19, 2020
676890b
Fix batch size calculation error (#1137)
InCogNiTo124 Jun 21, 2020
0ee7553
Docs updates (#1139)
vfdev-5 Jun 21, 2020
de4c80f
Fixes docs (#1147)
vfdev-5 Jun 21, 2020
f096742
Issue #1115 : pbar persists due to specific rule in tqdm (notebook) w…
sdesrozis Jun 22, 2020
6f51156
Updated codebase such that torch>=1.3 (#1150)
anmolsjoshi Jun 22, 2020
72db3bc
add wandb (#1152)
lavanyashukla Jun 23, 2020
e86ca81
Fixed typo and missing part of "Where to go next" (#1151)
vfdev-5 Jun 23, 2020
3827412
Fixes #1153 (#1154)
vfdev-5 Jun 23, 2020
f061557
Use global_step as priority, if it exists (#1155)
amatsukawa Jun 23, 2020
2d0f30a
Fix TrainsSaver handling of Checkpoint's n_saved (#1135)
jkhenning Jun 23, 2020
543ae1e
Stateful handlers (#1156)
amatsukawa Jun 24, 2020
ce3b66c
Bump version to 0.4rc.0.post1
vfdev-5 Jun 6, 2020
6efcd46
bump version to v0.4.0 🎉
erip Jun 24, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 62 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ run_pytorch_container: &run_pytorch_container
environment:
wd: << pipeline.parameters.workingdir >>
command: |
docker run --gpus=all --rm -itd -v ${wd}:/ignite -w /ignite --name pthd << pipeline.parameters.pytorch_stable_image >>
docker run --gpus=all --rm -itd --shm-size 16G -v ${wd}:/ignite -w /ignite --name pthd << pipeline.parameters.pytorch_stable_image >>
docker exec -it pthd nvidia-smi
docker exec -it pthd ls

Expand Down Expand Up @@ -80,7 +80,7 @@ jobs:

# pytest on cuda
export test_cmd='sh tests/run_gpu_tests.sh'
docker exec -it pthd /bin/bash -c "$test_cmd"
docker exec -it pthd /bin/bash -c "${test_cmd}"

# MNIST tests

Expand Down Expand Up @@ -118,7 +118,7 @@ jobs:
- run:
name: Codecov upload
command: |
codecov -F gpu || echo 'Codecov upload failed'
bash <(curl -s https://codecov.io/bash) -Z -F gpu


two_gpus_tests:
Expand All @@ -135,7 +135,64 @@ jobs:
name: Run 1 Node 2 GPUs Unit Tests
command: |
export test_cmd='sh tests/run_gpu_tests.sh 2'
docker exec -it pthd /bin/bash -c "$test_cmd"
docker exec -it pthd /bin/bash -c "${test_cmd}"

- run:
name: Codecov upload
command: |
bash <(curl -s https://codecov.io/bash) -Z -F gpu-2


two_gpus_check_dist_cifar10_example:
<<: *two_gpus

working_directory: << pipeline.parameters.workingdir >>

steps:
- checkout
- <<: *pull_pytorch_stable_image
- <<: *run_pytorch_container
- <<: *install_dependencies
- run:
name: "Install additional example dependencies"
command: |
docker exec -it pthd pip install fire
- run:
name: "Run without backend"
command: |
export example_path="examples/contrib/cifar10"
# initial run
export stop_cmd="--stop_iteration=500"
export test_cmd="CI=1 python ${example_path}/main.py run"
docker exec -it pthd /bin/bash -c "${test_cmd} ${stop_cmd}"
# resume
export resume_opt="--resume-from=/tmp/output-cifar10/resnet18_backend-None-1_stop-on-500/training_checkpoint_400.pt"
docker exec -it pthd /bin/bash -c "${test_cmd} --num_epochs=7 ${resume_opt}"

- run:
name: "Run with NCCL backend using torch dist launch"
command: |
export example_path="examples/contrib/cifar10"
# initial run
export stop_cmd="--stop_iteration=500"
export test_cmd="CI=1 python -u -m torch.distributed.launch --nproc_per_node=2 --use_env ${example_path}/main.py run --backend=nccl"
docker exec -it pthd /bin/bash -c "${test_cmd} ${stop_cmd}"
# resume
export resume_opt="--resume-from=/tmp/output-cifar10/resnet18_backend-nccl-2_stop-on-500/training_checkpoint_400.pt"
docker exec -it pthd /bin/bash -c "${test_cmd} --num_epochs=7 ${resume_opt}"

- run:
name: "Run with NCCL backend using spawn"
command: |
export example_path="examples/contrib/cifar10"
# initial run
export stop_cmd="--stop_iteration=500"
export test_cmd="CI=1 python -u ${example_path}/main.py run --backend=nccl --nproc_per_node=2"
docker exec -it pthd /bin/bash -c "${test_cmd} ${stop_cmd}"
# resume
export resume_opt="--resume-from=/tmp/output-cifar10/resnet18_backend-nccl-2_stop-on-500/training_checkpoint_400.pt"
docker exec -it pthd /bin/bash -c "${test_cmd} --num_epochs=7 ${resume_opt}"


# -------------------------------------------------------------------------------------
# Workflows
Expand All @@ -146,3 +203,4 @@ workflows:
jobs:
- one_gpu_tests
- two_gpus_tests
- two_gpus_check_dist_cifar10_example
12 changes: 3 additions & 9 deletions .github/workflows/pytorch-version-tests.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
on:
schedule:
# Run at 00:00 UTC on Sunday
- cron: '0 0 * * 0'
# Run at 00:00 UTC Every Day
- cron: '0 0 * * *'

jobs:
build:
Expand All @@ -11,16 +11,10 @@ jobs:
fail-fast: false
matrix:
python-version: [3.5, 3.6, 3.7, 3.8]
pytorch-version: [1.4.0, 1.3.1, 1.2.0, 1.1.0, 1.0.1]
pytorch-version: [1.4.0, 1.3.1]
exclude:
- pytorch-version: 1.3.1
python-version: 3.8
- pytorch-version: 1.2.0
python-version: 3.8
- pytorch-version: 1.1.0
python-version: 3.8
- pytorch-version: 1.0.1
python-version: 3.8

steps:
- uses: actions/checkout@v2
Expand Down
73 changes: 41 additions & 32 deletions .github/workflows/unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,54 +2,63 @@ on: [push, pull_request]

jobs:
build:
runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}
strategy:
max-parallel: 10
fail-fast: false
matrix:
os: [ubuntu-latest, ]
python-version: [3.5, 3.6, 3.7, 3.8]
pytorch-channel: [pytorch, pytorch-nightly]
exclude:
# excludes pytorch-nightly python 3.5 as it was dropped
- pytorch-channel: pytorch-nightly
python-version: 3.5
include:
# includes a single build on windows
- os: windows-latest
pytorch-channel: pytorch
python-version: 3.7
skip-distrib-tests: 1
# includes a single build on macosx
- os: macos-latest
pytorch-channel: pytorch
python-version: 3.7
skip-distrib-tests: 1

steps:
- uses: actions/checkout@v2
- name: Setup Conda
run: |
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
conda config --set always_yes yes --set changeps1 no
conda update -q conda
# Useful for debugging any issues with conda
conda info -a
conda create -q -n test-environment pytorch cpuonly python=${{ matrix.python-version }} -c ${{ matrix.pytorch-channel }}

- name: Setup Miniconda
uses: goanpeca/setup-miniconda@v1
with:
miniconda-version: "latest"
python-version: ${{ matrix.python-version }}

- name: Install dependencies
shell: bash -l {0}
run: |
export PATH="$HOME/miniconda/bin:$PATH"
source activate test-environment
# Keep fix in case of problem with torchvision nightly releases
# if [[ "${{ matrix.pytorch-channel }}" == "pytorch-nightly" ]]; then pip install --upgrade git+https://github.com/pytorch/vision.git; else conda install torchvision cpuonly python=${{ matrix.python-version }} -c ${{ matrix.pytorch-channel }}; fi
conda install torchvision cpuonly python=${{ matrix.python-version }} -c ${{ matrix.pytorch-channel }}
conda install pytorch torchvision cpuonly -c ${{ matrix.pytorch-channel }}
pip install -r requirements-dev.txt
# Fixes #1153
pip install --upgrade scipy==1.4.1
python setup.py install

- name: Run Tests
shell: bash -l {0}
run: |
export PATH="$HOME/miniconda/bin:$PATH"
source activate test-environment
CI_PYTHON_VERSION="${{ matrix.python-version }}" sh tests/run_cpu_tests.sh
SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} CI_PYTHON_VERSION="${{ matrix.python-version }}" sh tests/run_cpu_tests.sh

- name: Run MNIST Examples
shell: bash -l {0}
run: |
export PATH="$HOME/miniconda/bin:$PATH"
source activate test-environment
# MNIST
# 1) mnist.py
python examples/mnist/mnist.py --epochs=1

- name: Run MNIST with loggers Examples
shell: bash -l {0}
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
# 2) mnist_with_visdom.py
python -c "from visdom.server import download_scripts; download_scripts()" # download scripts : https://github.com/facebookresearch/visdom/blob/master/py/server.py#L929
python -m visdom.server &
Expand All @@ -63,40 +72,40 @@ jobs:
python examples/mnist/mnist_with_tensorboard.py --epochs=1

- name: Run MNIST Example With Crash
shell: bash -l {0}
if: ${{ matrix.os == 'ubuntu-latest' }}
continue-on-error: true
run: |
export PATH="$HOME/miniconda/bin:$PATH"
source activate test-environment
# 4) mnist_save_resume_engine.py
python examples/mnist/mnist_save_resume_engine.py --epochs=2 --crash_iteration 1100

- name: Resume MNIST from previous crash
shell: bash -l {0}
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
export PATH="$HOME/miniconda/bin:$PATH"
source activate test-environment
python examples/mnist/mnist_save_resume_engine.py --epochs=2 --resume_from=/tmp/mnist_save_resume/checkpoint_1.pt

- name: Run GAN example
shell: bash -l {0}
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
export PATH="$HOME/miniconda/bin:$PATH"
source activate test-environment
# DCGAN
python examples/gan/dcgan.py --dataset fake --dataroot /tmp/fakedata --output-dir /tmp/outputs-dcgan --batch-size 2 --epochs 2 --workers 0

- name: Run RL Examples
shell: bash -l {0}
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
export PATH="$HOME/miniconda/bin:$PATH"
source activate test-environment
# RL
# 1) Actor-Critic
python examples/reinforcement_learning/actor_critic.py --max-episodes=2
# 2) Reinforce
python examples/reinforcement_learning/reinforce.py --max-episodes=2

- name: Run Neural Style Example
shell: bash -l {0}
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
export PATH="$HOME/miniconda/bin:$PATH"
source activate test-environment
#fast-neural-style
#train
mkdir -p ~/.cache/torch/checkpoints/ && wget "https://download.pytorch.org/models/vgg16-397923af.pth" -O ~/.cache/torch/checkpoints/vgg16-397923af.pth
Expand Down
Loading