diff --git a/.github/workflows/build-cu116.properties.json b/.github/workflows/build-cu116.properties.json deleted file mode 100644 index 5f42933..0000000 --- a/.github/workflows/build-cu116.properties.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "name": "build-cu116", - "description": "Build wheels for Habitat with CUDA 1.16" -} diff --git a/.github/workflows/build-cu116.yaml b/.github/workflows/build-cu116.yaml deleted file mode 100644 index eb26fd4..0000000 --- a/.github/workflows/build-cu116.yaml +++ /dev/null @@ -1,41 +0,0 @@ -name: build-cu116 - -on: - workflow_dispatch: - -jobs: - whl-build-cu116: - runs-on: [self-hosted, cu116] - steps: - - name: Display host information - run: | - uname -a - hostname - pwd - id - ls -la - - - name: Fetch repository - uses: actions/checkout@v3 - - - name: Build Python3.7 wheel - run: | - /home/builduser/build_habitat.sh python3.7 py37 - - - name: Build Python3.8 wheel - run: | - /home/builduser/build_habitat.sh python3.8 py38 - - - name: Build Python3.9 wheel - run: | - /home/builduser/build_habitat.sh python3.9 py39 - - - name: Build Python3.10 wheel - run: | - /home/builduser/build_habitat.sh python3.10 py310 - - - name: Upload wheel artifact - uses: actions/upload-artifact@v3 - with: - name: wheel-cu116 - path: analyzer/dist/*.whl diff --git a/.github/workflows/test-workflow.properties.json b/.github/workflows/test-workflow.properties.json deleted file mode 100644 index 73f8133..0000000 --- a/.github/workflows/test-workflow.properties.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "name": "Test Workflow", - "description": "A test workflow." -} diff --git a/.github/workflows/test-workflow.yml b/.github/workflows/test-workflow.yml deleted file mode 100644 index 8bbd5eb..0000000 --- a/.github/workflows/test-workflow.yml +++ /dev/null @@ -1,181 +0,0 @@ -name: Test Workflow - -on: - workflow_dispatch: - -jobs: - whl-build-cu116: - runs-on: [self-hosted, cu116] - steps: - - name: Display host information - run: | - uname -a - hostname - pwd - id - ls -la - - - name: Fetch repository - uses: actions/checkout@v3 - - - name: Fetch submodules - run: git submodule update --init --recursive - - - name: Build - run: | - python3.9 -m virtualenv venv -p python3.9 - source venv/bin/activate - python -V - cd analyzer && ./install-dev.sh - - - name: Test import - run: | - cd analyzer - wget https://zenodo.org/record/4876277/files/habitat-models.tar.gz?download=1 -O habitat-models.tar.gz - ./extract-models.sh habitat-models.tar.gz - source ../venv/bin/activate && python -c "import habitat" - - - name: Generate wheel - run: | - source venv/bin/activate - python -m pip install -U wheel setuptools - cd analyzer - python setup.py sdist bdist_wheel - - - name: Upload wheel artifact - uses: actions/upload-artifact@v3 - with: - name: wheel-cu116 - path: analyzer/dist/*.whl - - whl-build-cu113: - runs-on: [self-hosted, cu113] - steps: - - name: Display host information - run: | - uname -a - hostname - pwd - id - ls -la - - - name: Fetch repository - uses: actions/checkout@v3 - - - name: Fetch submodules - run: git submodule update --init --recursive - - - name: Build - run: | - python3.9 -m virtualenv venv -p python3.9 - source venv/bin/activate - python -V - cd analyzer && ./install-dev.sh - - - name: Test import - run: | - cd analyzer - wget https://zenodo.org/record/4876277/files/habitat-models.tar.gz?download=1 -O habitat-models.tar.gz - ./extract-models.sh habitat-models.tar.gz - source ../venv/bin/activate && python -c "import habitat" - - - name: Generate wheel - run: | - source venv/bin/activate - python -m pip install -U wheel setuptools - cd analyzer - python setup.py sdist bdist_wheel - - - name: Upload wheel artifact - uses: actions/upload-artifact@v3 - with: - name: wheel-cu113 - path: analyzer/dist/*.whl - - whl-build-cu111: - runs-on: [self-hosted, cu111] - steps: - - name: Display host information - run: | - uname -a - hostname - pwd - id - ls -la - - - name: Fetch repository - uses: actions/checkout@v3 - - - name: Fetch submodules - run: git submodule update --init --recursive - - - name: Build - run: | - python3.9 -m virtualenv venv -p python3.9 - source venv/bin/activate - python -V - cd analyzer && ./install-dev.sh - - - name: Test import - run: | - cd analyzer - wget https://zenodo.org/record/4876277/files/habitat-models.tar.gz?download=1 -O habitat-models.tar.gz - ./extract-models.sh habitat-models.tar.gz - source ../venv/bin/activate && python -c "import habitat" - - - name: Generate wheel - run: | - source venv/bin/activate - python -m pip install -U wheel setuptools - cd analyzer - python setup.py sdist bdist_wheel - - - name: Upload wheel artifact - uses: actions/upload-artifact@v3 - with: - name: wheel-cu111 - path: analyzer/dist/*.whl - - whl-build-cu102: - runs-on: [self-hosted, cu102] - steps: - - name: Display host information - run: | - uname -a - hostname - pwd - id - ls -la - - - name: Fetch repository - uses: actions/checkout@v3 - - - name: Fetch submodules - run: git submodule update --init --recursive - - - name: Build - run: | - python3.9 -m virtualenv venv -p python3.9 - source venv/bin/activate - python -V - cd analyzer && ./install-dev.sh - - - name: Test import - run: | - cd analyzer - wget https://zenodo.org/record/4876277/files/habitat-models.tar.gz?download=1 -O habitat-models.tar.gz - ./extract-models.sh habitat-models.tar.gz - source ../venv/bin/activate && python -c "import habitat" - - - name: Generate wheel - run: | - source venv/bin/activate - python -m pip install -U wheel setuptools - cd analyzer - python setup.py sdist bdist_wheel - - - name: Upload wheel artifact - uses: actions/upload-artifact@v3 - with: - name: wheel-cu102 - path: analyzer/dist/*.whl diff --git a/.github/workflows/whl-build-all.properties.json b/.github/workflows/whl-build-all.properties.json new file mode 100644 index 0000000..118030b --- /dev/null +++ b/.github/workflows/whl-build-all.properties.json @@ -0,0 +1,4 @@ +{ + "name": "whl-build-all", + "description": "Build all wheels" +} diff --git a/.github/workflows/whl-build-all.yaml b/.github/workflows/whl-build-all.yaml new file mode 100644 index 0000000..d4c75d1 --- /dev/null +++ b/.github/workflows/whl-build-all.yaml @@ -0,0 +1,150 @@ +name: whl-build-all + +on: + workflow_dispatch: + +jobs: + whl-build-cu102: + runs-on: [self-hosted, cu102] + steps: + - name: Display host information + run: | + uname -a + hostname + pwd + id + ls -la + + - name: Fetch repository + uses: actions/checkout@v3 + + - name: Build Python3.7 wheel + run: | + /home/builduser/build_habitat.sh python3.7 py37 + + - name: Build Python3.8 wheel + run: | + /home/builduser/build_habitat.sh python3.8 py38 + + - name: Build Python3.9 wheel + run: | + /home/builduser/build_habitat.sh python3.9 py39 + + - name: Build Python3.10 wheel + run: | + /home/builduser/build_habitat.sh python3.10 py310 + + - name: Upload wheel artifact + uses: actions/upload-artifact@v3 + with: + name: wheels-cu102 + path: analyzer/dist/*.whl + + + whl-build-cu111: + runs-on: [self-hosted, cu111] + steps: + - name: Display host information + run: | + uname -a + hostname + pwd + id + ls -la + + - name: Fetch repository + uses: actions/checkout@v3 + + - name: Build Python3.7 wheel + run: | + /home/builduser/build_habitat.sh python3.7 py37 + + - name: Build Python3.8 wheel + run: | + /home/builduser/build_habitat.sh python3.8 py38 + + - name: Build Python3.9 wheel + run: | + /home/builduser/build_habitat.sh python3.9 py39 + + - name: Build Python3.10 wheel + run: | + /home/builduser/build_habitat.sh python3.10 py310 + + - name: Upload wheel artifact + uses: actions/upload-artifact@v3 + with: + name: wheels-cu111 + path: analyzer/dist/*.whl + + whl-build-cu113: + runs-on: [self-hosted, cu113] + steps: + - name: Display host information + run: | + uname -a + hostname + pwd + id + ls -la + + - name: Fetch repository + uses: actions/checkout@v3 + + - name: Build Python3.7 wheel + run: | + /home/builduser/build_habitat.sh python3.7 py37 + + - name: Build Python3.8 wheel + run: | + /home/builduser/build_habitat.sh python3.8 py38 + + - name: Build Python3.9 wheel + run: | + /home/builduser/build_habitat.sh python3.9 py39 + + - name: Build Python3.10 wheel + run: | + /home/builduser/build_habitat.sh python3.10 py310 + + - name: Upload wheel artifact + uses: actions/upload-artifact@v3 + with: + name: wheels-cu113 + path: analyzer/dist/*.whl + + whl-build-cu116: + runs-on: [self-hosted, cu116] + steps: + - name: Display host information + run: | + uname -a + hostname + pwd + id + ls -la + + - name: Fetch repository + uses: actions/checkout@v3 + + - name: Build Python3.7 wheel + run: | + /home/builduser/build_habitat.sh python3.7 py37 + + - name: Build Python3.8 wheel + run: | + /home/builduser/build_habitat.sh python3.8 py38 + + - name: Build Python3.9 wheel + run: | + /home/builduser/build_habitat.sh python3.9 py39 + + - name: Build Python3.10 wheel + run: | + /home/builduser/build_habitat.sh python3.10 py310 + + - name: Upload wheel artifact + uses: actions/upload-artifact@v3 + with: + name: wheels-cu116 + path: analyzer/dist/*.whl diff --git a/INSTALL.md b/INSTALL.md deleted file mode 100644 index 6f8d50b..0000000 --- a/INSTALL.md +++ /dev/null @@ -1,55 +0,0 @@ -# Installation -## CUDA Toolkit -Habitat depends on the CUDA toolkit, including the CUPTI examples. You can find a list of installers [here](https://developer.nvidia.com/cuda-toolkit-archive) from NVIDIA's website. - -After installation, verify that the folder `/usr/local/cuda/extras/CUPTI/samples` exists. On other distributions such as Arch Linux, this could also be located at `/opt/cuda/extras/CUPTI/samples`. - -## CMake -Habitat requires `cmake` versions 3.17 or above. To do so, consult `docker/Dockerfile` or run the following commands: -```sh -wget "https://github.com/Kitware/CMake/releases/download/v3.17.0-rc1/cmake-3.17.0-rc1.tar.gz" -O cmake-3.17.0-rc1.tar.gz -tar xzf cmake-3.17.0-rc1.tar.gz - -cd cmake-3.17.0-rc1 && \ - ./bootstrap && \ - make -j && \ - sudo make install -``` - -## Building Habitat -Change directory to `analyzer` and ensure that: -* the Python version in `SO_PATH` is set correctly (e.g. `habitat_cuda.cpython-39-x86_64-linux-gnu.so` for Python 3.9) -* the `CUPTI_PATH` variable is pointed to the CUPTI directory for your distribution - -Then, to begin building, run `./install-dev.sh`. - -## Download pretrained models -The MLP component of Habitat requires pretrained models that are not included in the main repository. To download them, run: -```sh -wget https://zenodo.org/record/4876277/files/habitat-models.tar.gz?download=1 -O habitat-models.tar.gz -./extract-models.sh habitat-models.tar.gz -``` - -## Verify installation -You can verify your Habitat installation by running the simple usage example: -```py -import habitat -import torch -import torchvision.models as models - -# Define model and sample inputs -model = models.resnet50().cuda() -image = torch.rand(8, 3, 224, 224).cuda() - -# Measure a single inference -tracker = habitat.OperationTracker(device=habitat.Device.RTX2080Ti) -with tracker.track(): - out = model(image) - -trace = tracker.get_tracked_trace() -print("Run time on source:", trace.run_time_ms) - -# Perform prediction to a single target device -pred = trace.to_device(habitat.Device.V100) -print("Predicted time on V100:", pred.run_time_ms) -``` \ No newline at end of file diff --git a/README.md b/README.md index b9807e3..c3e8d12 100644 --- a/README.md +++ b/README.md @@ -1,41 +1,157 @@ -# Habitat: A Runtime-Based Computational Performance Predictor for Deep Neural Network Training +# Habitat -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4885489.svg)](https://doi.org/10.5281/zenodo.4885489) -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4876277.svg)](https://doi.org/10.5281/zenodo.4876277) +[![License](https://img.shields.io/badge/license-Apache--2.0-green?style=flat)](https://github.com/CentML/habitat/blob/main/LICENSE) -Habitat is a tool that predicts a deep neural network's training iteration -execution time on a given GPU. It currently supports PyTorch. To learn more -about how Habitat works, please see our [research -paper](https://arxiv.org/abs/2102.00527). -## Running From Source +A Runtime-Based Computational Performance Predictor for Deep Neural Network Training -Currently, the only way to run Habitat is to build it from source. You should -use the Docker image provided in this repository to make sure that you can -compile the code. +- [Installation](#installation) +- [Building from source](#build) +- [Usage example](#getting-started) +- [Development Environment Setup](#dev-setup) +- [Release process](#release-process) +- [Release history](#release-history) +- [License](#license) +- [Research paper](#paper) +- [Contributing](#contributing) -1. Download the [Habitat pre-trained - models](https://doi.org/10.5281/zenodo.4876277). -2. Run `extract-models.sh` under `analyzer` to extract and install the - pre-trained models. -3. Run `setup.sh` under `docker/` to build the Habitat container image. -4. Run `start.sh` to start a new container. By default, your home directory - will be mounted inside the container under `~/home`. -5. Once inside the container, run `install-dev.sh` under `analyzer/` to build - and install the Habitat package. -6. In your scripts, `import habitat` to get access to Habitat. See - `experiments/run_experiment.py` for an example showing how to use Habitat. +Habitat is a tool that predicts a deep neural network's training iteration execution time on a given GPU. It currently supports PyTorch. To learn more about how Habitat works, please see our [research paper](https://arxiv.org/abs/2102.00527). -**Note:** Habitat needs access to your GPU's performance counters, which -requires special permissions if you are running with a recent driver (418.43 or -later). If you encounter a `CUPTI_ERROR_INSUFFICIENT_PRIVILEGES` error when -running Habitat, please follow the instructions -[here](https://developer.nvidia.com/ERR_NVGPUCTRPERM) -and in [issue #5](https://github.com/geoffxy/habitat/issues/5). +

Installation

+To run Habitat, you need: +- [Python 3.6+](https://www.python.org/) +- [Pytorch 1.1.0+](https://pytorch.org/) +- A system equiped with an Nvidia GPU. -## License +Currently, we have predictors for the following Nvidia GPUs: + +| GPU | Generation | Memory | Mem. Type | SMs | +| ---------- |:-----------:| ------:| :-------: | :-: | +| P4000 | Pascal | 8 GB | GDDR5 | 14 | +| P100 | Pascal | 16 GB | HBM2 | 56 | +| V100 | Volta | 16 GB | HBM2 | 80 | +| 2070 | Turing | 8 GB | GDDR6 | 36 | +| 2080Ti | Turing | 11 GB | GDDR6 | 68 | +| T4 | Turing | 16 GB | GDDR6 | 40 | +| 3090 | Ampere | 24 GB | GDDR6X | 82 | + +**NOTE:** Not implmented yet +```zsh +python3 -m pip install habitat +python3 -c "import habitat" +``` + +

Building from source

+ +Prerequsites: +- A system equiped with an Nvidia GPU with properly configured CUDA +- [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive) +- [cmake v3.17+](https://github.com/Kitware/CMake/releases) +- [Habitat pre-trained models](https://zenodo.org/record/4876277) + +```zsh +git clone https://github.com/CentML/habitat.git && cd habitat +git submodule init && git submodule update + +# Download the pre-trained models +cd analyzer +curl -O https://zenodo.org/record/4876277/files/habitat-models.tar.gz\?download\=1 + +# Install the models +./extract-models.sh +``` + +**Note:** Habitat needs access to your GPU's performance counters, which requires special permissions if you are running with a recent driver (418.43 or later). If you encounter a `CUPTI_ERROR_INSUFFICIENT_PRIVILEGES` error when running Habitat, please follow the instructions [here](https://developer.nvidia.com/ERR_NVGPUCTRPERM) and in [issue #5](https://github.com/geoffxy/habitat/issues/5). + +### Building with Docker + +Habitat has been tested to work on the latest version of [NVIDIA NGC PyTorch containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch). + +1. To build Habitat with Docker, first run the NGC container. +```bash +docker run --gpus all -it --rm nvcr.io/nvidia/pytorch:22.08-py3 +``` +2. Inside the container, clone the repository then build and install the Habitat Python package: +```bash +git clone --recursive https://github.com/centml/habitat +./habitat/analyzer/install-dev.sh +``` +3. Download and extract the pretrained models by following the steps in the previous section. + +### Building without Docker + +1. Install CUPTI + +CUPTI is a profiling interface required by Habitat. Select the correct version of CUDA [here](https://developer.nvidia.com/cuda-toolkit-archive) and following the instructions to add NVIDIA's repository. Then, install CUPTI with: +```bash +sudo apt-get install cuda-cupti-11-x +``` +where `11-x` represents the version of CUDA you have installed. + +2. Install `CMake` 3.17+. + +Follow these steps to download and install a precompiled version of CMake: +```bash +wget https://github.com/Kitware/CMake/releases/download/v3.24.0/cmake-3.24.0-linux-x86_64.sh +chmod +x cmake-3.24.0-linux-x86_64.sh +mkdir /opt/cmake +sh cmake-3.24.0-linux-x86_64.sh --prefix=/opt/cmake --skip-license +ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake +``` +You can verify the version of CMake you installed with the following: +```bash +cmake --version +``` +3. Build and install the Habitat Python package: +```bash +git clone https://github.com/centml/habitat +./habitat/analyzer/install-dev.sh +``` +4. Download and extract the pretrained models by following the steps in the previous section. + +

Usage example

+ +You can verify your Habitat installation by running the simple usage example: +```python +# example.py +import habitat +import torch +import torchvision.models as models + +# Define model and sample inputs +model = models.resnet50().cuda() +image = torch.rand(8, 3, 224, 224).cuda() + +# Measure a single inference +tracker = habitat.OperationTracker(device=habitat.Device.RTX2080Ti) +with tracker.track(): + out = model(image) + +trace = tracker.get_tracked_trace() +print("Run time on source:", trace.run_time_ms) + +# Perform prediction to a single target device +pred = trace.to_device(habitat.Device.V100) +print("Predicted time on V100:", pred.run_time_ms) +``` + +```zsh +python3 example.py +``` + +See [experiments/run_experiment.py](https://github.com/CentML/habitat/tree/main/experiments) for other examples of Habitat usage. + +

Development Environment Setup

+ +

Release Process

+ +

Release History

+ +See [Releases](https://github.com/UofT-EcoSystem/habitat/releases) + +

License

The code in this repository is licensed under the Apache 2.0 license (see `LICENSE` and `NOTICE`), with the exception of the files mentioned below. @@ -56,15 +172,11 @@ corresponding `README` files and license files inside the subdirectories for more information. -## Research Paper +

Research Paper

-Habitat began as a research project in the [EcoSystem -Group](https://www.cs.toronto.edu/ecosystem) at the [University of -Toronto](https://cs.toronto.edu). The accompanying research paper will appear -in the proceedings of [USENIX +Habitat began as a research project in the [EcoSystem Group](https://www.cs.toronto.edu/ecosystem) at the [University of Toronto](https://cs.toronto.edu). The accompanying research paper appeared in the proceedings of [USENIX ATC'21](https://www.usenix.org/conference/atc21/presentation/yu). If you are -interested, you can read a preprint of the paper -[here](https://arxiv.org/abs/2102.00527). +interested, you can read a preprint of the paper [here](https://arxiv.org/abs/2102.00527). If you use Habitat in your research, please consider citing our paper: @@ -79,3 +191,7 @@ If you use Habitat in your research, please consider citing our paper: year = {2021}, } ``` +

Contributing

+ +Check out [CONTRIBUTING.md](https://github.com/CentML/habitat/blob/main/CONTRIBUTING.md) for more information on how to help with Habitat. +