Skip to content

Commit

Permalink
feat: Added Python accuracy tests using Nox
Browse files Browse the repository at this point in the history
Signed-off-by: Anurag Dixit <anuragd@nvidia.com>
  • Loading branch information
Anurag Dixit committed Nov 30, 2021
1 parent 09afccb commit 6ae8652
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 1 deletion.
21 changes: 21 additions & 0 deletions docker/dist-accuracy-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
set -o nounset
set -o errexit
set -o pipefail
set -e

post=${1:-""}

# fetch bazel executable
BAZEL_VERSION=4.2.1
ARCH=$(uname -m)
if [[ "$ARCH" == "aarch64" ]]; then ARCH="arm64"; fi
wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-${ARCH} -O /usr/bin/bazel
chmod a+x /usr/bin/bazel
export NVIDIA_TF32_OVERRIDE=0

cd /opt/pytorch/torch_tensorrt
cp cp /opt/pytorch/torch_tensorrt/docker/WORKSPACE.docker /opt/pytorch/torch_tensorrt/WORKSPACE

pip install --user --upgrade nox
nox
132 changes: 132 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import nox
import os

# Use system installed Python packages
PYT_PATH='/opt/conda/lib/python3.8/site-packages'

# Root directory for torch_tensorrt. Set according to docker container by default
TOP_DIR='/opt/pytorch/torch_tensorrt'

# Download the dataset
@nox.session(python=["3"], reuse_venv=True)
def download_datasets(session):
session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16'))
session.run_always('wget', 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz')
session.run_always('tar', '-xvzf', 'cifar-10-binary.tar.gz')
session.run_always('mkdir', '-p',
os.path.join(TOP_DIR, 'tests/accuracy/datasets/data'))
session.run_always('cp', '-rpf',
os.path.join(TOP_DIR, 'examples/int8/training/vgg16/cifar-10-batches-bin'),
os.path.join(TOP_DIR, 'tests/accuracy/datasets/data/cidar-10-batches-bin'),
external=True)

# Download the model
@nox.session(python=["3"], reuse_venv=True)
def download_models(session):
session.install('timm')
session.chdir('tests/modules')
session.run_always('python',
'hub.py',
env={'PYTHONPATH': PYT_PATH})

# Train the model
@nox.session(python=["3"], reuse_venv=True)
def train_model(session):
session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16'))
session.run_always('python',
'main.py',
'--lr', '0.01',
'--batch-size', '128',
'--drop-ratio', '0.15',
'--ckpt-dir', 'vgg16_ckpts',
'--epochs', '25',
env={'PYTHONPATH': PYT_PATH})

# Export model
session.run_always('python',
'export_ckpt.py',
'vgg16_ckpts/ckpt_epoch25.pth',
env={'PYTHONPATH': PYT_PATH})

# Finetune the model
@nox.session(python=["3"], reuse_venv=True)
def finetune_model(session):
# Install pytorch-quantization dependency
session.install('pytorch-quantization', '--extra-index-url', 'https://pypi.ngc.nvidia.com')

session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16'))
session.run_always('python',
'finetune_qat.py',
'--lr', '0.01',
'--batch-size', '128',
'--drop-ratio', '0.15',
'--ckpt-dir', 'vgg16_ckpts',
'--start-from', '25',
'--epochs', '26',
env={'PYTHONPATH': PYT_PATH})

# Export model
session.run_always('python',
'export_qat.py',
'vgg16_ckpts/ckpt_epoch26.pth',
env={'PYTHONPATH': PYT_PATH})

# Run PTQ tests
@nox.session(python=["3"], reuse_venv=True)
def ptq_test(session):
session.chdir(os.path.join(TOP_DIR, 'tests/py'))
session.run_always('cp', '-rf',
os.path.join(TOP_DIR, 'examples/int8/training/vgg16', 'trained_vgg16.jit.pt'),
'.',
external=True)
tests = [
'test_ptq_dataloader_calibrator.py',
'test_ptq_to_backend.py',
'test_ptq_trt_calibrator.py'
]
for test in tests:
session.run_always('python', test,
env={'PYTHONPATH': PYT_PATH})

# Run QAT tests
@nox.session(python=["3"], reuse_venv=True)
def qat_test(session):
session.chdir(os.path.join(TOP_DIR, 'tests/py'))
session.run_always('cp', '-rf',
os.path.join(TOP_DIR, 'examples/int8/training/vgg16', 'trained_vgg16_qat.jit.pt'),
'.',
external=True)

session.run_always('python',
'test_qat_trt_accuracy.py',
env={'PYTHONPATH': PYT_PATH})

# Run Python API tests
@nox.session(python=["3"], reuse_venv=True)
def api_test(session):
session.chdir(os.path.join(TOP_DIR, 'tests/py'))
tests = [
"test_api.py",
"test_to_backend_api.py"
]
for test in tests:
session.run_always('python',
test,
env={'PYTHONPATH': PYT_PATH})

# Clean up
@nox.session(reuse_venv=True)
def cleanup(session):
target = [
'examples/int8/training/vgg16/*.jit.pt',
'examples/int8/training/vgg16/vgg16_ckpts',
'examples/int8/training/vgg16/cifar-10-*',
'examples/int8/training/vgg16/data',
'tests/modules/*.jit.pt',
'tests/py/*.jit.pt'
]

target = ' '.join(x for x in [os.path.join(TOP_DIR, i) for i in target])
session.run_always('bash', '-c',
str('rm -rf ') + target,
external=True)
23 changes: 22 additions & 1 deletion tests/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Tests

Right now there are two types of tests. Converter level tests and Module level tests.
Currently, following tests are supported:
1. Converter level tests
2. Module level tests
3. Accuracy tests

The goal of Converter tests are to tests individual converters againsts specific subgraphs. The current tests in `core/conveters` are good examples on how to write these tests. In general every converter should have at least 1 test. More may be required if the operation has switches that change the behavior of the op.

Expand All @@ -20,6 +23,24 @@ bazel test //tests --compilation_mode=dbg --test_output=errors --jobs=4 --runs_p

`--jobs=4` is useful and is sometimes required to prevent too many processes to use GPU memory and cause CUDA out of memory issues.

Additionally, accuracy tests are supported for Python backend using Nox. Please refer [dist-accuracy-test.sh](../docker/dist-accuracy-test.sh) for reference.
```
# To run complete Python accuracy + API tests
nox
nox -l
```

Note: Supported Python tests
```
* download_datasets-3
* download_models-3
* train_model-3
* finetune_model-3
* ptq_test-3
* qat_test-3
* cleanup
```
### Testing using pre-built Torch-TensorRT library

Currently, the default strategy when we run all the tests (`bazel test //tests`) is to build the testing scripts along with the full Torch-TensorRT library (`libtorchtrt.so`) from scratch. This can lead to increased testing time and might not be needed incase you already have a pre-built Torch-TensorRT library that you want to link against.
Expand Down

0 comments on commit 6ae8652

Please sign in to comment.