diff --git a/.azure-pipelines/azure-pipelines-linux.yml b/.azure-pipelines/azure-pipelines-linux.yml
index ce171708..a0e2f018 100755
--- a/.azure-pipelines/azure-pipelines-linux.yml
+++ b/.azure-pipelines/azure-pipelines-linux.yml
@@ -11,27 +11,27 @@ jobs:
       linux_64_mpi_typeconda:
         CONFIG: linux_64_mpi_typeconda
         UPLOAD_PACKAGES: 'True'
-        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cos7-cuda:10.2
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.2
       linux_64_mpi_typeexternal:
         CONFIG: linux_64_mpi_typeexternal
         UPLOAD_PACKAGES: 'True'
-        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cos7-cuda:10.2
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.2
       linux_aarch64_mpi_typeconda:
         CONFIG: linux_aarch64_mpi_typeconda
         UPLOAD_PACKAGES: 'True'
-        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-aarch64-cuda:11.0
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-aarch64-cuda:11.2
       linux_aarch64_mpi_typeexternal:
         CONFIG: linux_aarch64_mpi_typeexternal
         UPLOAD_PACKAGES: 'True'
-        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-aarch64-cuda:11.0
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-aarch64-cuda:11.2
       linux_ppc64le_mpi_typeconda:
         CONFIG: linux_ppc64le_mpi_typeconda
         UPLOAD_PACKAGES: 'True'
-        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-ppc64le-cuda:11.0
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-ppc64le-cuda:11.2
       linux_ppc64le_mpi_typeexternal:
         CONFIG: linux_ppc64le_mpi_typeexternal
         UPLOAD_PACKAGES: 'True'
-        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-ppc64le-cuda:11.0
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-ppc64le-cuda:11.2
   timeoutInMinutes: 360
 
   steps:
@@ -45,6 +45,9 @@ jobs:
 
   - script: |
         export CI=azure
+        export flow_run_id=azure_$(Build.BuildNumber).$(System.JobAttempt)
+        export remote_url=$(Build.Repository.Uri)
+        export sha=$(Build.SourceVersion)
         export GIT_BRANCH=$BUILD_SOURCEBRANCHNAME
         export FEEDSTOCK_NAME=$(basename ${BUILD_REPOSITORY_NAME})
         if [[ "${BUILD_REASON:-}" == "PullRequest" ]]; then
diff --git a/.azure-pipelines/azure-pipelines-osx.yml b/.azure-pipelines/azure-pipelines-osx.yml
index 8032e967..f3ff0633 100755
--- a/.azure-pipelines/azure-pipelines-osx.yml
+++ b/.azure-pipelines/azure-pipelines-osx.yml
@@ -20,6 +20,9 @@ jobs:
   # TODO: Fast finish on azure pipelines?
   - script: |
       export CI=azure
+      export flow_run_id=azure_$(Build.BuildNumber).$(System.JobAttempt)
+      export remote_url=$(Build.Repository.Uri)
+      export sha=$(Build.SourceVersion)
       export OSX_FORCE_SDK_DOWNLOAD="1"
       export GIT_BRANCH=$BUILD_SOURCEBRANCHNAME
       export FEEDSTOCK_NAME=$(basename ${BUILD_REPOSITORY_NAME})
diff --git a/.ci_support/linux_64_mpi_typeconda.yaml b/.ci_support/linux_64_mpi_typeconda.yaml
index 7ca0c399..d3d55a8a 100644
--- a/.ci_support/linux_64_mpi_typeconda.yaml
+++ b/.ci_support/linux_64_mpi_typeconda.yaml
@@ -8,20 +8,24 @@ channel_sources:
 - conda-forge
 channel_targets:
 - conda-forge main
-cudatoolkit:
-- '10.2'
+cuda_version:
+- '11.2'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
 - '12'
 docker_image:
-- quay.io/condaforge/linux-anvil-cos7-cuda:10.2
+- quay.io/condaforge/linux-anvil-cuda:11.2
 enable_cuda:
 - 'True'
+enable_ucx:
+- 'True'
 fortran_compiler:
 - gfortran
 fortran_compiler_version:
 - '12'
+libevent:
+- 2.1.12
 mpi_type:
 - conda
 target_platform:
diff --git a/.ci_support/linux_64_mpi_typeexternal.yaml b/.ci_support/linux_64_mpi_typeexternal.yaml
index 7ce8c92b..c2000102 100644
--- a/.ci_support/linux_64_mpi_typeexternal.yaml
+++ b/.ci_support/linux_64_mpi_typeexternal.yaml
@@ -8,20 +8,24 @@ channel_sources:
 - conda-forge
 channel_targets:
 - conda-forge main
-cudatoolkit:
-- '10.2'
+cuda_version:
+- '11.2'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
 - '12'
 docker_image:
-- quay.io/condaforge/linux-anvil-cos7-cuda:10.2
+- quay.io/condaforge/linux-anvil-cuda:11.2
 enable_cuda:
 - 'True'
+enable_ucx:
+- 'True'
 fortran_compiler:
 - gfortran
 fortran_compiler_version:
 - '12'
+libevent:
+- 2.1.12
 mpi_type:
 - external
 target_platform:
diff --git a/.ci_support/linux_aarch64_mpi_typeconda.yaml b/.ci_support/linux_aarch64_mpi_typeconda.yaml
index 33462f5b..8e4d450f 100644
--- a/.ci_support/linux_aarch64_mpi_typeconda.yaml
+++ b/.ci_support/linux_aarch64_mpi_typeconda.yaml
@@ -12,20 +12,24 @@ channel_sources:
 - conda-forge
 channel_targets:
 - conda-forge main
-cudatoolkit:
-- '11.0'
+cuda_version:
+- '11.2'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
 - '12'
 docker_image:
-- quay.io/condaforge/linux-anvil-aarch64-cuda:11.0
+- quay.io/condaforge/linux-anvil-aarch64-cuda:11.2
 enable_cuda:
 - 'True'
+enable_ucx:
+- 'True'
 fortran_compiler:
 - gfortran
 fortran_compiler_version:
 - '12'
+libevent:
+- 2.1.12
 mpi_type:
 - conda
 target_platform:
diff --git a/.ci_support/linux_aarch64_mpi_typeexternal.yaml b/.ci_support/linux_aarch64_mpi_typeexternal.yaml
index c1358d42..52b4b19b 100644
--- a/.ci_support/linux_aarch64_mpi_typeexternal.yaml
+++ b/.ci_support/linux_aarch64_mpi_typeexternal.yaml
@@ -12,20 +12,24 @@ channel_sources:
 - conda-forge
 channel_targets:
 - conda-forge main
-cudatoolkit:
-- '11.0'
+cuda_version:
+- '11.2'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
 - '12'
 docker_image:
-- quay.io/condaforge/linux-anvil-aarch64-cuda:11.0
+- quay.io/condaforge/linux-anvil-aarch64-cuda:11.2
 enable_cuda:
 - 'True'
+enable_ucx:
+- 'True'
 fortran_compiler:
 - gfortran
 fortran_compiler_version:
 - '12'
+libevent:
+- 2.1.12
 mpi_type:
 - external
 target_platform:
diff --git a/.ci_support/linux_ppc64le_mpi_typeconda.yaml b/.ci_support/linux_ppc64le_mpi_typeconda.yaml
index 48143ba6..415145ab 100644
--- a/.ci_support/linux_ppc64le_mpi_typeconda.yaml
+++ b/.ci_support/linux_ppc64le_mpi_typeconda.yaml
@@ -8,20 +8,24 @@ channel_sources:
 - conda-forge
 channel_targets:
 - conda-forge main
-cudatoolkit:
-- '11.0'
+cuda_version:
+- '11.2'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
 - '12'
 docker_image:
-- quay.io/condaforge/linux-anvil-ppc64le-cuda:11.0
+- quay.io/condaforge/linux-anvil-ppc64le-cuda:11.2
 enable_cuda:
 - 'True'
+enable_ucx:
+- 'True'
 fortran_compiler:
 - gfortran
 fortran_compiler_version:
 - '12'
+libevent:
+- 2.1.12
 mpi_type:
 - conda
 target_platform:
diff --git a/.ci_support/linux_ppc64le_mpi_typeexternal.yaml b/.ci_support/linux_ppc64le_mpi_typeexternal.yaml
index 01daf9f5..b5e15b8d 100644
--- a/.ci_support/linux_ppc64le_mpi_typeexternal.yaml
+++ b/.ci_support/linux_ppc64le_mpi_typeexternal.yaml
@@ -8,20 +8,24 @@ channel_sources:
 - conda-forge
 channel_targets:
 - conda-forge main
-cudatoolkit:
-- '11.0'
+cuda_version:
+- '11.2'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
 - '12'
 docker_image:
-- quay.io/condaforge/linux-anvil-ppc64le-cuda:11.0
+- quay.io/condaforge/linux-anvil-ppc64le-cuda:11.2
 enable_cuda:
 - 'True'
+enable_ucx:
+- 'True'
 fortran_compiler:
 - gfortran
 fortran_compiler_version:
 - '12'
+libevent:
+- 2.1.12
 mpi_type:
 - external
 target_platform:
diff --git a/.ci_support/migrations/ucx1150.yaml b/.ci_support/migrations/ucx1150.yaml
deleted file mode 100644
index d23514a6..00000000
--- a/.ci_support/migrations/ucx1150.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-__migrator:
-  build_number: 1
-  kind: version
-  migration_number: 1
-ucx:
-- '1.15.0'
-migrator_ts: 1696558865
diff --git a/.ci_support/osx_64_.yaml b/.ci_support/osx_64_.yaml
index 5af651f1..5c59be27 100644
--- a/.ci_support/osx_64_.yaml
+++ b/.ci_support/osx_64_.yaml
@@ -14,10 +14,14 @@ cxx_compiler_version:
 - '16'
 enable_cuda:
 - 'False'
+enable_ucx:
+- 'False'
 fortran_compiler:
 - gfortran
 fortran_compiler_version:
 - '12'
+libevent:
+- 2.1.12
 macos_machine:
 - x86_64-apple-darwin13.4.0
 mpi_type:
diff --git a/.ci_support/osx_arm64_.yaml b/.ci_support/osx_arm64_.yaml
index c2c21350..8b6b7604 100644
--- a/.ci_support/osx_arm64_.yaml
+++ b/.ci_support/osx_arm64_.yaml
@@ -14,10 +14,14 @@ cxx_compiler_version:
 - '16'
 enable_cuda:
 - 'False'
+enable_ucx:
+- 'False'
 fortran_compiler:
 - gfortran
 fortran_compiler_version:
 - '12'
+libevent:
+- 2.1.12
 macos_machine:
 - arm64-apple-darwin20.0.0
 mpi_type:
diff --git a/.gitignore b/.gitignore
index c89ecb7d..c0029107 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,13 @@
-*.pyc
+# User content belongs under recipe/.
+# Feedstock configuration goes in `conda-forge.yml`
+# Everything else is managed by the conda-smithy rerender process.
+# Please do not modify
+
+*
+!/conda-forge.yml
 
-build_artifacts
+!/*/
+!/recipe/**
+!/.ci_support/**
+
+*.pyc
diff --git a/.scripts/build_steps.sh b/.scripts/build_steps.sh
index bd9e6710..eba1dfdb 100755
--- a/.scripts/build_steps.sh
+++ b/.scripts/build_steps.sh
@@ -28,13 +28,15 @@ conda-build:
 pkgs_dirs:
   - ${FEEDSTOCK_ROOT}/build_artifacts/pkg_cache
   - /opt/conda/pkgs
+solver: libmamba
 
 CONDARC
+export CONDA_LIBMAMBA_SOLVER_NO_CHANNELS_FROM_INSTALLED=1
 
 mamba install --update-specs --yes --quiet --channel conda-forge --strict-channel-priority \
-    pip mamba conda-build boa conda-forge-ci-setup=3
+    pip mamba conda-build boa conda-forge-ci-setup=4
 mamba update --update-specs --yes --quiet --channel conda-forge --strict-channel-priority \
-    pip mamba conda-build boa conda-forge-ci-setup=3
+    pip mamba conda-build boa conda-forge-ci-setup=4
 
 # set up the condarc
 setup_conda_rc "${FEEDSTOCK_ROOT}" "${RECIPE_ROOT}" "${CONFIG_FILE}"
@@ -55,6 +57,12 @@ if [[ -f "${FEEDSTOCK_ROOT}/LICENSE.txt" ]]; then
   cp "${FEEDSTOCK_ROOT}/LICENSE.txt" "${RECIPE_ROOT}/recipe-scripts-license.txt"
 fi
 
+if [[ "${sha:-}" == "" ]]; then
+  pushd ${FEEDSTOCK_ROOT}
+  sha=$(git rev-parse HEAD)
+  popd
+fi
+
 if [[ "${BUILD_WITH_CONDA_DEBUG:-0}" == 1 ]]; then
     if [[ "x${BUILD_OUTPUT_ID:-}" != "x" ]]; then
         EXTRA_CB_OPTIONS="${EXTRA_CB_OPTIONS:-} --output-id ${BUILD_OUTPUT_ID}"
@@ -68,7 +76,8 @@ if [[ "${BUILD_WITH_CONDA_DEBUG:-0}" == 1 ]]; then
 else
     conda mambabuild "${RECIPE_ROOT}" -m "${CI_SUPPORT}/${CONFIG}.yaml" \
         --suppress-variables ${EXTRA_CB_OPTIONS:-} \
-        --clobber-file "${CI_SUPPORT}/clobber_${CONFIG}.yaml"
+        --clobber-file "${CI_SUPPORT}/clobber_${CONFIG}.yaml" \
+        --extra-meta flow_run_id="${flow_run_id:-}" remote_url="${remote_url:-}" sha="${sha:-}"
     ( startgroup "Validating outputs" ) 2> /dev/null
 
     validate_recipe_outputs "${FEEDSTOCK_NAME}"
diff --git a/.scripts/run_docker_build.sh b/.scripts/run_docker_build.sh
index 92362398..b70ef014 100755
--- a/.scripts/run_docker_build.sh
+++ b/.scripts/run_docker_build.sh
@@ -91,6 +91,9 @@ docker run ${DOCKER_RUN_ARGS} \
            -e CPU_COUNT \
            -e BUILD_WITH_CONDA_DEBUG \
            -e BUILD_OUTPUT_ID \
+           -e flow_run_id \
+           -e remote_url \
+           -e sha \
            -e BINSTAR_TOKEN \
            -e FEEDSTOCK_TOKEN \
            -e STAGING_BINSTAR_TOKEN \
diff --git a/.scripts/run_osx_build.sh b/.scripts/run_osx_build.sh
index 870c49aa..9259eb93 100755
--- a/.scripts/run_osx_build.sh
+++ b/.scripts/run_osx_build.sh
@@ -22,11 +22,13 @@ bash $MINIFORGE_FILE -b -p ${MINIFORGE_HOME}
 
 source ${MINIFORGE_HOME}/etc/profile.d/conda.sh
 conda activate base
+export CONDA_SOLVER="libmamba"
+export CONDA_LIBMAMBA_SOLVER_NO_CHANNELS_FROM_INSTALLED=1
 
 mamba install --update-specs --quiet --yes --channel conda-forge --strict-channel-priority \
-    pip mamba conda-build boa conda-forge-ci-setup=3
+    pip mamba conda-build boa conda-forge-ci-setup=4
 mamba update --update-specs --yes --quiet --channel conda-forge --strict-channel-priority \
-    pip mamba conda-build boa conda-forge-ci-setup=3
+    pip mamba conda-build boa conda-forge-ci-setup=4
 
 
 
@@ -45,6 +47,10 @@ else
   echo -e "\n\nNot mangling homebrew as we are not running in CI"
 fi
 
+if [[ "${sha:-}" == "" ]]; then
+  sha=$(git rev-parse HEAD)
+fi
+
 echo -e "\n\nRunning the build setup script."
 source run_conda_forge_build_setup
 
@@ -77,7 +83,8 @@ else
 
     conda mambabuild ./recipe -m ./.ci_support/${CONFIG}.yaml \
         --suppress-variables ${EXTRA_CB_OPTIONS:-} \
-        --clobber-file ./.ci_support/clobber_${CONFIG}.yaml
+        --clobber-file ./.ci_support/clobber_${CONFIG}.yaml \
+        --extra-meta flow_run_id="$flow_run_id" remote_url="$remote_url" sha="$sha"
     ( startgroup "Validating outputs" ) 2> /dev/null
 
     validate_recipe_outputs "${FEEDSTOCK_NAME}"
diff --git a/README.md b/README.md
index 9c2b12dd..4da7973f 100644
--- a/README.md
+++ b/README.md
@@ -175,7 +175,7 @@ available continuous integration services. Thanks to the awesome service provide
 [CircleCI](https://circleci.com/), [AppVeyor](https://www.appveyor.com/),
 [Drone](https://cloud.drone.io/welcome), and [TravisCI](https://travis-ci.com/)
 it is possible to build and upload installable packages to the
-[conda-forge](https://anaconda.org/conda-forge) [Anaconda-Cloud](https://anaconda.org/)
+[conda-forge](https://anaconda.org/conda-forge) [anaconda.org](https://anaconda.org/)
 channel for Linux, Windows and OSX respectively.
 
 To manage the continuous integration and simplify feedstock maintenance
diff --git a/recipe/build-mpi.sh b/recipe/build-mpi.sh
index 40e78746..5255ae6e 100755
--- a/recipe/build-mpi.sh
+++ b/recipe/build-mpi.sh
@@ -1,32 +1,37 @@
 #!/bin/bash
 
-# unset unused old fortran compiler vars
-unset F90 F77
-
 set -ex
 
-export FCFLAGS="$FFLAGS"
-
 # avoid absolute-paths in compilers
 export CC=$(basename "$CC")
 export CXX=$(basename "$CXX")
 export FC=$(basename "$FC")
 
-./autogen.pl --force
+# unset unused Fortran compiler variables
+unset FFLAGS F77 F90 F95
 
+# tweak compiler flags
+export LIBRARY_PATH="$PREFIX/lib"
 if [[ "$target_platform" == osx-* ]]; then
-    if [[ ! -z "$CONDA_BUILD_SYSROOT" ]]; then
+    if [[ -n "$CONDA_BUILD_SYSROOT" ]]; then
         export CFLAGS="$CFLAGS -isysroot $CONDA_BUILD_SYSROOT"
         export CXXFLAGS="$CXXFLAGS -isysroot $CONDA_BUILD_SYSROOT"
     fi
 fi
 
-if [[ -z $CUDA_HOME ]]; then
-    build_with_cuda=""
-else
+# UCX support
+build_with_ucx=""
+if [[ "$target_platform" == linux-* ]]; then
+    build_with_ucx="--with-ucx=$PREFIX"
+fi
+
+# CUDA support
+build_with_cuda=""
+if [[ -n "$CUDA_HOME" ]]; then
+    build_with_cuda="--with-cuda=yes"
     export CFLAGS="$CFLAGS -I$CUDA_HOME/include"
     export CXXFLAGS="$CXXFLAGS -I$CUDA_HOME/include"
-    build_with_cuda="--with-cuda --with-ucx=$PREFIX"
+    export LDFLAGS="$LDFLAGS -L$CUDA_HOME/lib64/stubs"
 fi
 
 if [[ $CONDA_BUILD_CROSS_COMPILATION == "1"  && $target_platform == osx-arm64 ]]; then
@@ -132,15 +137,6 @@ if [[ $CONDA_BUILD_CROSS_COMPILATION == "1"  && $target_platform == osx-arm64 ]]
     export ompi_cv_fortran_use_only=yes
 fi
 
-export LIBRARY_PATH="$PREFIX/lib"
-
-## Replaced by the patch from open-mpi/ompi#8361
-# if [[ "$target_platform" == *-64 ]]; then
-#     # -march=skylake-avx512 -march=nocona invalidates AVX512 flag. Remove -march flags and -mtune flags
-#     export CFLAGS=$(echo $CFLAGS | sed 's/-march=[a-z0-9\-]*//g')
-#     export CFLAGS=$(echo $CFLAGS | sed 's/-mtune=[a-z0-9\-]*//g')
-# fi
-
 ./configure --prefix=$PREFIX \
             --disable-dependency-tracking \
             --enable-mpi-fortran \
@@ -151,23 +147,32 @@ export LIBRARY_PATH="$PREFIX/lib"
             --with-wrapper-fcflags="-I$PREFIX/include" \
             --with-wrapper-ldflags="-L$PREFIX/lib -Wl,-rpath,$PREFIX/lib" \
             --with-sge \
-            $build_with_cuda || (cat config.log; false)
+            --with-hwloc=$PREFIX \
+            --with-libevent=$PREFIX \
+            --with-zlib=$PREFIX \
+            --enable-mca-dso \
+            $build_with_ucx \
+            $build_with_cuda \
+    || (cat config.log; false)
 
 make -j"${CPU_COUNT:-1}"
 make install
 
-if [ ! -z "$build_with_cuda" ]; then
-    echo "setting the mca opal_warn_on_missing_libcuda to 0..."
-    echo "opal_warn_on_missing_libcuda = 0" >> $PREFIX/etc/openmpi-mca-params.conf
-    echo "setting the mca opal_cuda_support to 0..."
-    echo "opal_cuda_support = 0" >> $PREFIX/etc/openmpi-mca-params.conf
-
-    echo "setting the mca pml to ^ucx..."
+POST_LINK=$PREFIX/bin/.openmpi-post-link.sh
+if [ -n "$build_with_ucx" ]; then
+    echo "setting MCA pml to ^ucx..."
     echo "pml = ^ucx" >> $PREFIX/etc/openmpi-mca-params.conf
-    echo "setting the mca osc to ^ucx..."
+    echo "setting MCA osc to ^ucx..."
     echo "osc = ^ucx" >> $PREFIX/etc/openmpi-mca-params.conf
-
-    POST_LINK=$PREFIX/bin/.openmpi-post-link.sh
-    cp $RECIPE_DIR/post-link.sh $POST_LINK
+    cat $RECIPE_DIR/post-link-ucx.sh >> $POST_LINK
+fi
+if [ -n "$build_with_cuda" ]; then
+    echo "setting MCA opal_warn_on_missing_libcuda to 0..."
+    echo "opal_warn_on_missing_libcuda = 0" >> $PREFIX/etc/openmpi-mca-params.conf
+    echo "setting MCA opal_cuda_support to 0..."
+    echo "opal_cuda_support = 0" >> $PREFIX/etc/openmpi-mca-params.conf
+    cat $RECIPE_DIR/post-link-cuda.sh >> $POST_LINK
+fi
+if [ -f $POST_LINK ]; then
     chmod +x $POST_LINK
 fi
diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml
index 9badca02..b16eacc9 100644
--- a/recipe/conda_build_config.yaml
+++ b/recipe/conda_build_config.yaml
@@ -2,13 +2,16 @@ mpi_type:
   - external  # [linux]
   - conda
 docker_image:  # [linux]
-  - quay.io/condaforge/linux-anvil-cos7-cuda:10.2      # [linux64]
-  - quay.io/condaforge/linux-anvil-ppc64le-cuda:11.0   # [ppc64le]
-  - quay.io/condaforge/linux-anvil-aarch64-cuda:11.0   # [aarch64]
-cudatoolkit:  # [linux]
-  - 10.2      # [linux64]
-  - 11.0      # [ppc64le]
-  - 11.0      # [aarch64]
+  - quay.io/condaforge/linux-anvil-cuda:11.2           # [linux64]
+  - quay.io/condaforge/linux-anvil-ppc64le-cuda:11.2   # [ppc64le]
+  - quay.io/condaforge/linux-anvil-aarch64-cuda:11.2   # [aarch64]
+enable_ucx:
+  - True       # [linux]
+  - False      # [not linux]
 enable_cuda:
-  - True      # [linux]
-  - False     # [not linux]
+  - True       # [linux]
+  - False      # [not linux]
+cuda_version:  # [linux]
+  - 11.2       # [linux64]
+  - 11.2       # [ppc64le]
+  - 11.2       # [aarch64]
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 05739cc3..0c33a2b0 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -1,6 +1,6 @@
-{% set version = "4.1.6" %}
+{% set version = "5.0.0" %}
 {% set major = version.rpartition('.')[0] %}
-{% set build = 1 %}
+{% set build = 0 %}
 
 # give conda package a higher build number
 {% if mpi_type == 'conda' %}
@@ -15,7 +15,7 @@ package:
 source:
   fn: openmpi-{{ version }}.tar.bz2
   url: https://www.open-mpi.org/software/ompi/v{{ major }}/downloads/openmpi-{{ version }}.tar.bz2
-  sha256: f740994485516deb63b5311af122c265179f5328a0d857a567b85db00b11e415
+  sha256: 9d845ca94bc1aeb445f83d98d238cd08f6ec7ad0f73b0f79ec1668dbfdacd613
 
 build:
   number: {{ build }}
@@ -29,33 +29,43 @@ outputs:
       run_exports:
         - {{ pin_subpackage('openmpi', min_pin='x.x.x', max_pin='x') }}
       ignore_run_exports:
-        - ucx  # [enable_cuda]
+        - ucx  # [enable_ucx]
       script_env:
-        - CUDA_HOME     # [enable_cuda]
+        - CUDA_HOME  # [enable_cuda]
     requirements:
       build:
         - {{ compiler('c') }}
         - {{ compiler('cxx') }}
         - {{ compiler('fortran') }}
-        - autoconf  # [unix]
-        - automake  # [unix]
+        #- autoconf  # [unix]
+        #- automake  # [unix]
         - libtool   # [unix]
         - make  # [unix]
-        - perl 5.26.2
       host:
+        #- openpmix
+        #- prrte
+        - libhwloc
+        - libevent
         - zlib
-        - ucx                 # [enable_cuda]
-        - ucx-proc =*=gpu     # [enable_cuda]
+        - ucx  # [enable_ucx]
+        - cuda-version {{ cuda_version }}  # [enable_cuda]
       run:
-        - zlib
         - mpi 1.0 openmpi
+        #- openpmix
+        #- prrte
+        - libhwloc
+        - libevent
+        - zlib
       run_constrained:
-        - cudatoolkit  >= {{ cudatoolkit }}         # [enable_cuda]
-        - {{ pin_compatible("ucx", max_pin="x") }}  # [enable_cuda]
+        - {{ pin_compatible("ucx", max_pin="x.x") }}  # [enable_ucx]
+        # Open MPI only uses CUDA Driver APIs, set the minimal driver version
+        - __cuda  >= {{ cuda_version }}  # [enable_cuda]
+        # Ensure a consistent CUDA environment
+        - cuda-version  >= {{ cuda_version }}  # [enable_cuda]
+        - cudatoolkit   >= {{ cuda_version }}  # [enable_cuda]
     test:
       script: run_test.sh
       files:
-        - mpiexec.sh
         - tests/helloworld.sh
 
   - name: openmpi-mpicc
@@ -73,7 +83,6 @@ outputs:
     test:
       script: run_test.sh
       files:
-        - mpiexec.sh
         - tests/helloworld.c
 
   - name: openmpi-mpicxx
@@ -91,7 +100,6 @@ outputs:
     test:
       script: run_test.sh
       files:
-        - mpiexec.sh
         - tests/helloworld.cxx
 
   - name: openmpi-mpifort
@@ -108,7 +116,6 @@ outputs:
     test:
       script: run_test.sh
       files:
-        - mpiexec.sh
         - tests/helloworld.f
         - tests/helloworld.f90
   {% else %}
diff --git a/recipe/mpiexec.sh b/recipe/mpiexec.sh
deleted file mode 100755
index 5b401fa0..00000000
--- a/recipe/mpiexec.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-# pipe stdout, stderr through cat to avoid O_NONBLOCK issues
-exec mpiexec --allow-run-as-root "$@" 2>&1</dev/null | cat
diff --git a/recipe/post-link-cuda.sh b/recipe/post-link-cuda.sh
new file mode 100644
index 00000000..6587573a
--- /dev/null
+++ b/recipe/post-link-cuda.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+cat << EOF >> $PREFIX/.messages.txt
+
+On Linux, Open MPI is built with CUDA awareness but it is disabled by default.
+To enable it, please set the environment variable
+OMPI_MCA_opal_cuda_support=true
+before launching your MPI processes.
+Equivalently, you can set the MCA parameter in the command line:
+mpiexec --mca opal_cuda_support 1 ...
+Note that you might also need to set UCX_MEMTYPE_CACHE=n for CUDA awareness via
+UCX. Please consult UCX documentation for further details.
+
+EOF
diff --git a/recipe/post-link-ucx.sh b/recipe/post-link-ucx.sh
new file mode 100644
index 00000000..2cf94669
--- /dev/null
+++ b/recipe/post-link-ucx.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+cat << EOF >> $PREFIX/.messages.txt
+
+On Linux, Open MPI is built with UCX support but it is disabled by default.
+To enable it, first install UCX (conda install -c conda-forge ucx).
+Afterwards, set the environment variables
+OMPI_MCA_pml=ucx OMPI_MCA_osc=ucx
+before launching your MPI processes.
+Equivalently, you can set the MCA parameters in the command line:
+mpiexec --mca pml ucx --mca osc ucx ...
+
+EOF
diff --git a/recipe/post-link.sh b/recipe/post-link.sh
deleted file mode 100644
index 8a45c74a..00000000
--- a/recipe/post-link.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-
-echo " " >> $PREFIX/.messages.txt
-echo "For Linux 64, Open MPI is built with CUDA awareness but this support is disabled by default." >> $PREFIX/.messages.txt
-echo "To enable it, please set the environment variable OMPI_MCA_opal_cuda_support=true before" >> $PREFIX/.messages.txt
-echo "launching your MPI processes. Equivalently, you can set the MCA parameter in the command line:" >> $PREFIX/.messages.txt
-echo "mpiexec --mca opal_cuda_support 1 ..." >> $PREFIX/.messages.txt
-echo " " >> $PREFIX/.messages.txt
-
-echo "In addition, the UCX support is also built but disabled by default." >> $PREFIX/.messages.txt
-echo "To enable it, first install UCX (conda install -c conda-forge ucx). Then, set the environment" >> $PREFIX/.messages.txt
-echo "variables OMPI_MCA_pml=\"ucx\" OMPI_MCA_osc=\"ucx\" before launching your MPI processes." >> $PREFIX/.messages.txt
-echo "Equivalently, you can set the MCA parameters in the command line:" >> $PREFIX/.messages.txt
-echo "mpiexec --mca pml ucx --mca osc ucx ..." >> $PREFIX/.messages.txt
-echo "Note that you might also need to set UCX_MEMTYPE_CACHE=n for CUDA awareness via UCX." >> $PREFIX/.messages.txt
-echo "Please consult UCX's documentation for detail." >> $PREFIX/.messages.txt
-echo " " >> $PREFIX/.messages.txt
diff --git a/recipe/run_test.sh b/recipe/run_test.sh
index a4a67eb1..33b609a0 100755
--- a/recipe/run_test.sh
+++ b/recipe/run_test.sh
@@ -1,26 +1,34 @@
 #!/bin/bash
 set -ex
 
-export OMPI_MCA_plm=isolated
-export OMPI_MCA_btl_vader_single_copy_mechanism=none
-export OMPI_MCA_rmaps_base_oversubscribe=yes
-MPIEXEC="${PWD}/mpiexec.sh"
+export OMPI_MCA_pml=ob1
+export OMPI_MCA_btl=sm,self
+export OMPI_MCA_plm_ssh_agent=false
+export OMPI_MCA_rmaps_default_mapping_policy=:oversubscribe
+export OMPI_ALLOW_RUN_AS_ROOT=1
+export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
+MPIEXEC="mpiexec"
 
 pushd "tests"
 
 if [[ $PKG_NAME == "openmpi" ]]; then
-  command -v ompi_info
-  ompi_info
 
-  if [[ ! -z "$(conda list | grep ucx)" ]]; then
+  if [[ -n "$(conda list | grep ucx)" ]]; then
     echo "Improper UCX dependency!"
     exit 1
   fi
-  if [[ ! -z "$(conda list | grep cudatoolkit)" ]]; then
-    echo "Improper cuda dependency!"
+
+  if [[ -n "$(conda list | grep cuda-version)" ]]; then
+    echo "Improper CUDA dependency!"
     exit 1
   fi
 
+  command -v ompi_info
+  ompi_info
+
+  command -v prte_info
+  prte_info
+
   command -v mpiexec
   $MPIEXEC --help
   $MPIEXEC -n 4 ./helloworld.sh