From fd30f521246e441f9a070815215e12c6d5bb7d65 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Thu, 30 Jan 2025 05:36:22 +0000 Subject: [PATCH] migraphx ci uses rocm docker file --- .../linux-migraphx-ci-pipeline.yml | 21 +++-- .../migraphx-ci-pipeline-env.Dockerfile | 83 ------------------- 2 files changed, 12 insertions(+), 92 deletions(-) delete mode 100644 tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile diff --git a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml index c6efd306ed43f..2c24702dac3b6 100644 --- a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml @@ -37,7 +37,7 @@ variables: - name: render value: 109 - name: RocmVersion - value: 6.2.3 + value: 6.3.2 jobs: - job: Linux_Build @@ -59,10 +59,10 @@ jobs: - template: templates/get-docker-image-steps.yml parameters: - Dockerfile: tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile + Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.ubuntu_rocm Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)" - Repository: onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion) + DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion) --build-arg USE_MIGRAPHX=1" + Repository: onnxruntime-migraphx-cibuild-rocm$(RocmVersion) - task: Cache@2 inputs: @@ -90,9 +90,10 @@ jobs: --volume $(CCACHE_DIR):/cache \ -e CCACHE_DIR=/cache \ --workdir /onnxruntime_src \ - onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion) \ + onnxruntime-migraphx-cibuild-rocm$(RocmVersion) \ /bin/bash -c " set -ex; \ + source /ort/env/bin/activate; \ env; \ ccache -s; \ python tools/ci_build/build.py \ @@ -104,6 +105,7 @@ jobs: onnxruntime_USE_COMPOSABLE_KERNEL=OFF \ --mpi_home /opt/ompi \ --use_migraphx \ + --migraphx_home /opt/rocm \ --rocm_version=$(RocmVersion) \ --rocm_home /opt/rocm \ --nccl_home /opt/rocm \ @@ -158,10 +160,10 @@ jobs: - template: templates/get-docker-image-steps.yml parameters: - Dockerfile: tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile + Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.ubuntu_rocm Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)" - Repository: onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion) + DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion) --build-arg USE_MIGRAPHX=1" + Repository: onnxruntime-migraphx-cibuild-rocm$(RocmVersion) - task: CmdLine@2 inputs: @@ -177,9 +179,10 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --workdir /build/Release \ - onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion) \ + onnxruntime-migraphx-cibuild-rocm$(RocmVersion) \ /bin/bash -c " set -ex; \ + source /ort/env/bin/activate; \ cd /build/Release && xargs -a /build/Release/perms.txt chmod a+x; \ bash /onnxruntime_src/tools/ci_build/github/pai/pai_test_launcher.sh" workingDirectory: $(Build.SourcesDirectory) diff --git a/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile b/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile deleted file mode 100644 index 51591e11ea2e9..0000000000000 --- a/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile +++ /dev/null @@ -1,83 +0,0 @@ -# Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete -FROM ubuntu:22.04 - -ARG ROCM_VERSION=6.2.3 -ARG AMDGPU_VERSION=${ROCM_VERSION} -ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' - -CMD ["/bin/bash"] - -RUN echo "$APT_PREF" > /etc/apt/preferences.d/rocm-pin-600 - -ENV DEBIAN_FRONTEND noninteractive - -RUN apt-get update && \ - apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg && \ - curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - &&\ - printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | tee /etc/apt/sources.list.d/rocm.list && \ - printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | tee /etc/apt/sources.list.d/amdgpu.list && \ - apt-get update && apt-get install -y --no-install-recommends \ - sudo \ - libelf1 \ - kmod \ - file \ - python3 \ - python3-pip \ - rocm-dev \ - rocm-libs \ - build-essential && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -RUN groupadd -g 109 render - -# Upgrade to meet security requirements -RUN apt-get update -y && apt-get upgrade -y && apt-get autoremove -y && \ - apt-get install -y locales cifs-utils wget half libnuma-dev lsb-release && \ - apt-get clean -y - -ENV MIGRAPHX_DISABLE_FAST_GELU=1 -RUN locale-gen en_US.UTF-8 -RUN update-locale LANG=en_US.UTF-8 -ENV LC_ALL C.UTF-8 -ENV LANG C.UTF-8 - -WORKDIR /stage - -# Cmake -ENV CMAKE_VERSION=3.30.1 -RUN cd /usr/local && \ - wget -q https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && \ - tar -zxf /usr/local/cmake-3.30.1-Linux-x86_64.tar.gz --strip=1 -C /usr - -# ccache -RUN mkdir -p /tmp/ccache && \ - cd /tmp/ccache && \ - wget -q -O - https://github.com/ccache/ccache/releases/download/v4.7.4/ccache-4.7.4-linux-x86_64.tar.xz | tar --strip 1 -J -xf - && \ - cp /tmp/ccache/ccache /usr/bin && \ - rm -rf /tmp/ccache - -# Install Conda -ENV PATH /opt/miniconda/bin:${PATH} -RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh --no-check-certificate && /bin/bash ~/miniconda.sh -b -p /opt/miniconda && \ - conda init bash && \ - conda config --set auto_activate_base false && \ - conda update --all && \ - rm ~/miniconda.sh && conda clean -ya - -# Create migraphx-ci environment -ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/migraphx-ci -ENV CONDA_DEFAULT_ENV migraphx-ci -RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.10 -ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH} - -# Enable migraphx-ci environment -SHELL ["conda", "run", "-n", "migraphx-ci", "/bin/bash", "-c"] - -# ln -sf is needed to make sure that version `GLIBCXX_3.4.30' is found -RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libstdc++.so.6 - -# Install migraphx -RUN apt update && apt install -y migraphx - -RUN pip install numpy packaging ml_dtypes==0.5.0