diff --git a/.azure-pipelines/scripts/install_nc.sh b/.azure-pipelines/scripts/install_nc.sh index d3cee07609c..755d51f69c6 100644 --- a/.azure-pipelines/scripts/install_nc.sh +++ b/.azure-pipelines/scripts/install_nc.sh @@ -2,10 +2,10 @@ echo -e "\n Install Neural Compressor ... " cd /neural-compressor -if [[ $1 = *"3x_pt" ]]; then +if [[ $1 = *"3x_pt"* ]]; then python -m pip install --no-cache-dir -r requirements_pt.txt python setup.py pt bdist_wheel - pip install dist/neural_compressor*.whl --force-reinstall + pip install --no-deps dist/neural_compressor*.whl --force-reinstall elif [[ $1 = *"3x_tf"* ]]; then python -m pip install --no-cache-dir -r requirements_tf.txt python setup.py tf bdist_wheel diff --git a/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh b/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh index 386ec397c81..03f4fd02dbf 100644 --- a/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh +++ b/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh @@ -25,7 +25,8 @@ git config --global --add safe.directory /neural-compressor git fetch git checkout master rm -rf build dist *egg-info -echo y | pip uninstall neural_compressor_${1} +binary_index="${1%_fp8}" +echo y | pip uninstall neural_compressor_${binary_index} cd /neural-compressor/.azure-pipelines-pr/scripts && bash install_nc.sh ${1} coverage erase diff --git a/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8 b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8 new file mode 100644 index 00000000000..f1bf27d8da3 --- /dev/null +++ b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8 @@ -0,0 +1,15 @@ +[run] +branch = True + +[report] +include = + */neural_compressor/torch/algorithms/habana_fp8/* + */neural_compressor/torch/amp/* +exclude_lines = + pragma: no cover + raise NotImplementedError + raise TypeError + if self.device == "gpu": + if device == "gpu": + except ImportError: + except Exception as e: \ No newline at end of file diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh b/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh index b91bc182c7c..5c5637765fa 100644 --- a/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh +++ b/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh @@ -15,6 +15,7 @@ inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__ cd /neural-compressor/test/3x || exit 1 rm -rf tensorflow rm -rf onnxrt +rm -rf torch/algorithms/fp8_quant LOG_DIR=/neural-compressor/log_dir mkdir -p ${LOG_DIR} diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh b/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh new file mode 100644 index 00000000000..d2aef0c3045 --- /dev/null +++ b/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh @@ -0,0 +1,35 @@ +#!/bin/bash +python -c "import neural_compressor as nc" +test_case="run 3x Torch Habana FP8" +echo "${test_case}" + +# install requirements +echo "set up UT env..." +sed -i '/^intel_extension_for_pytorch/d' /neural-compressor/test/3x/torch/requirements.txt +pip install -r /neural-compressor/test/3x/torch/requirements.txt +pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.16.0 +pip install pytest-cov +pip install pytest-html +pip list + +export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8 +inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])') +cd /neural-compressor/test/3x || exit 1 + +LOG_DIR=/neural-compressor/log_dir +mkdir -p ${LOG_DIR} +ut_log_name=${LOG_DIR}/ut_3x_pt_fp8.log +pytest --cov="${inc_path}" -vs --disable-warnings --html=report.html --self-contained-html torch/algorithms/fp8_quant 2>&1 | tee -a ${ut_log_name} + +cp report.html ${LOG_DIR}/ + +if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then + echo "Find errors in pytest case, please check the output..." + echo "Please search for '== FAILURES ==' or '== ERRORS =='" + exit 1 +fi + +# if ut pass, collect the coverage file into artifacts +cp .coverage ${LOG_DIR}/.coverage + +echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/template/docker-template.yml b/.azure-pipelines/template/docker-template.yml index e7b563bcea7..9e98d31e6b9 100644 --- a/.azure-pipelines/template/docker-template.yml +++ b/.azure-pipelines/template/docker-template.yml @@ -16,6 +16,9 @@ parameters: - name: repo type: string default: "https://github.com/intel/neural-compressor" + - name: imageSource + type: string + default: "build" steps: - task: Bash@3 @@ -24,7 +27,7 @@ steps: script: | docker ps -a if [[ $(docker ps -a | grep -i '${{ parameters.containerName }}'$) ]]; then - docker start $(docker ps -aq) + docker start $(docker ps -aq --filter "name=${{ parameters.containerName }}") echo "remove left files through container ..." docker exec ${{ parameters.containerName }} bash -c "ls -a /neural-compressor && rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* && ls -a /neural-compressor || true" fi @@ -57,19 +60,25 @@ steps: git checkout master displayName: "Checkout out master" - - script: | - if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then - docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} . - fi - docker images | grep -i ${{ parameters.repoName }} - if [[ $? -ne 0 ]]; then - echo "NO Such Repo" - exit 1 - fi - displayName: "Build develop docker image" + - ${{ if eq(parameters.imageSource, 'build') }}: + - script: | + if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then + docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} . + fi + docker images | grep -i ${{ parameters.repoName }} + if [[ $? -ne 0 ]]; then + echo "NO Such Repo" + exit 1 + fi + displayName: "Build develop docker image" + + - ${{ if eq(parameters.imageSource, 'pull') }}: + - script: | + docker pull vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest + displayName: "Pull habana docker image" - script: | - docker stop $(docker ps -aq) + docker stop $(docker ps -aq --filter "name=${{ parameters.containerName }}") docker rm -vf ${{ parameters.containerName }} || true env | sort displayName: "Clean docker container" @@ -79,8 +88,15 @@ steps: inputs: targetType: "inline" script: | - docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \ - -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 ${{ parameters.repoName }}:${{ parameters.repoTag }} + if [[ "${{ parameters.imageSource }}" == "build" ]]; then + docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \ + -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 \ + ${{ parameters.repoName }}:${{ parameters.repoTag }} + else + docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \ + --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host \ + -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest + fi echo "Show the container list after docker run ... " docker ps -a displayName: "Docker run - ${{ parameters.containerName }} Container" diff --git a/.azure-pipelines/template/ut-template.yml b/.azure-pipelines/template/ut-template.yml index b7fecacd3d7..d8908d22a35 100644 --- a/.azure-pipelines/template/ut-template.yml +++ b/.azure-pipelines/template/ut-template.yml @@ -17,6 +17,9 @@ parameters: - name: utContainerName type: string default: "utTest" + - name: imageSource + type: string + default: "build" steps: - template: docker-template.yml @@ -27,6 +30,7 @@ steps: dockerFileName: "Dockerfile" containerName: ${{ parameters.utContainerName }} repo: ${{ parameters.repo }} + imageSource: ${{ parameters.imageSource }} - script: | docker exec ${{ parameters.utContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts \ diff --git a/.azure-pipelines/ut-3x-pt-fp8.yml b/.azure-pipelines/ut-3x-pt-fp8.yml new file mode 100644 index 00000000000..4fa440fea8d --- /dev/null +++ b/.azure-pipelines/ut-3x-pt-fp8.yml @@ -0,0 +1,106 @@ +trigger: none + +pr: + autoCancel: true + drafts: false + branches: + include: + - master + paths: + include: + - neural_compressor/common + - setup.py + - requirements_pt.txt + - .azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh + +pool: GAUDI + +variables: + IMAGE_NAME: "neural-compressor" + IMAGE_TAG: "py310" + UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir + DOWNLOAD_PATH: $(Build.SourcesDirectory)/log_dir + ARTIFACT_NAME: "UT_coverage_report_3x_pt_fp8" + REPO: $(Build.Repository.Uri) + +stages: + - stage: Torch_habana + displayName: Torch 3x Habana FP8 + dependsOn: [] + jobs: + - job: + displayName: Torch 3x Habana FP8 + steps: + - template: template/ut-template.yml + parameters: + imageSource: "pull" + dockerConfigName: "commonDockerConfig" + utScriptFileName: "3x/run_3x_pt_fp8" + uploadPath: $(UPLOAD_PATH) + utArtifact: "ut_3x_pt_fp8" + + - stage: Torch_habana_baseline + displayName: Torch 3x Habana FP8 baseline + dependsOn: [] + jobs: + - job: + displayName: Torch 3x Habana FP8 baseline + steps: + - template: template/ut-template.yml + parameters: + imageSource: "pull" + dockerConfigName: "gitCloneDockerConfig" + utScriptFileName: "3x/run_3x_pt_fp8" + uploadPath: $(UPLOAD_PATH) + utArtifact: "ut_3x_pt_fp8" + + - stage: Coverage + displayName: "Coverage Compare" + pool: + vmImage: "ubuntu-latest" + dependsOn: [Torch_habana, Torch_habana_baseline] + jobs: + - job: CollectDatafiles + steps: + - script: | + if [[ ! $(docker images | grep -i ${IMAGE_NAME}:${IMAGE_TAG}) ]]; then + docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/Dockerfile.devel -t ${IMAGE_NAME}:${IMAGE_TAG} . + fi + docker images | grep -i ${IMAGE_NAME} + if [[ $? -ne 0 ]]; then + echo "NO Such Repo" + exit 1 + fi + displayName: "Build develop docker image" + + - task: DownloadPipelineArtifact@2 + inputs: + artifact: + patterns: '*_coverage/.coverage' + path: $(DOWNLOAD_PATH) + + - script: | + echo "--- create container ---" + docker run -d -it --name="collectLogs" -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor ${IMAGE_NAME}:${IMAGE_TAG} /bin/bash + echo "--- docker ps ---" + docker ps + echo "--- collect logs ---" + docker exec collectLogs /bin/bash +x -c "cd /neural-compressor/.azure-pipelines/scripts \ + && bash install_nc.sh 3x_pt_fp8 \ + && bash ut/3x/collect_log_3x.sh 3x_pt_fp8" + displayName: "Collect UT Coverage" + + - task: PublishPipelineArtifact@1 + condition: succeededOrFailed() + inputs: + targetPath: $(UPLOAD_PATH) + artifact: $(ARTIFACT_NAME) + publishLocation: "pipeline" + + - task: Bash@3 + condition: always() + inputs: + targetType: "inline" + script: | + docker exec collectLogs bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true" + displayName: "Docker clean up" diff --git a/neural_compressor/torch/utils/environ.py b/neural_compressor/torch/utils/environ.py index 0697979996d..6eb2f849a93 100644 --- a/neural_compressor/torch/utils/environ.py +++ b/neural_compressor/torch/utils/environ.py @@ -46,6 +46,7 @@ def is_package_available(package_name): ## check hpex if is_package_available("habana_frameworks"): _hpex_available = True + import habana_frameworks.torch.hpex # pylint: disable=E0401 else: _hpex_available = False diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py new file mode 100644 index 00000000000..98ca06222a5 --- /dev/null +++ b/test/3x/torch/algorithms/fp8_quant/test_basic.py @@ -0,0 +1,56 @@ +import os +import sys +import time + +import habana_frameworks.torch.core as htcore +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.utils.data import DataLoader +from torchvision import datasets, transforms + + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.fc1 = nn.Linear(784, 256) + self.fc2 = nn.Linear(256, 64) + self.fc3 = nn.Linear(64, 10) + + def forward(self, x): + out = x.view(-1, 28 * 28) + out = F.relu(self.fc1(out)) + out = F.relu(self.fc2(out)) + out = self.fc3(out) + out = F.log_softmax(out, dim=1) + return out + + +def test_hpu(): + model = Net() + model_link = "https://vault.habana.ai/artifactory/misc/inference/mnist/mnist-epoch_20.pth" + model_path = "/tmp/.neural_compressor/mnist-epoch_20.pth" + os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(model_link, model_path)) + checkpoint = torch.load(model_path) + model.load_state_dict(checkpoint) + + model = model.eval() + + model = model.to("hpu") + + transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) + + data_path = "./data" + test_kwargs = {"batch_size": 32} + dataset1 = datasets.MNIST(data_path, train=False, download=True, transform=transform) + test_loader = torch.utils.data.DataLoader(dataset1, **test_kwargs) + + correct = 0 + for batch_idx, (data, label) in enumerate(test_loader): + data = data.to("hpu") + output = model(data) + htcore.mark_step() + correct += output.max(1)[1].eq(label).sum() + + accuracy = 100.0 * correct / (len(test_loader) * 32) + assert accuracy > 90